Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'vfio-v5.9-rc2' of git://github.com/awilliam/linux-vfio

Pull VFIO fixes from Alex Williamson:

- Fix lockdep issue reported for recursive read-lock (Alex Williamson)

- Fix missing unwind in type1 replay function (Alex Williamson)

* tag 'vfio-v5.9-rc2' of git://github.com/awilliam/linux-vfio:
vfio/type1: Add proper error unwind for vfio_iommu_replay()
vfio-pci: Avoid recursive read-lock usage

+164 -29
+2
drivers/vfio/pci/vfio_pci_private.h
··· 33 33 34 34 struct vfio_pci_ioeventfd { 35 35 struct list_head next; 36 + struct vfio_pci_device *vdev; 36 37 struct virqfd *virqfd; 37 38 void __iomem *addr; 38 39 uint64_t data; 39 40 loff_t pos; 40 41 int bar; 41 42 int count; 43 + bool test_mem; 42 44 }; 43 45 44 46 struct vfio_pci_irq_ctx {
+96 -24
drivers/vfio/pci/vfio_pci_rdwr.c
··· 37 37 #define vfio_ioread8 ioread8 38 38 #define vfio_iowrite8 iowrite8 39 39 40 + #define VFIO_IOWRITE(size) \ 41 + static int vfio_pci_iowrite##size(struct vfio_pci_device *vdev, \ 42 + bool test_mem, u##size val, void __iomem *io) \ 43 + { \ 44 + if (test_mem) { \ 45 + down_read(&vdev->memory_lock); \ 46 + if (!__vfio_pci_memory_enabled(vdev)) { \ 47 + up_read(&vdev->memory_lock); \ 48 + return -EIO; \ 49 + } \ 50 + } \ 51 + \ 52 + vfio_iowrite##size(val, io); \ 53 + \ 54 + if (test_mem) \ 55 + up_read(&vdev->memory_lock); \ 56 + \ 57 + return 0; \ 58 + } 59 + 60 + VFIO_IOWRITE(8) 61 + VFIO_IOWRITE(16) 62 + VFIO_IOWRITE(32) 63 + #ifdef iowrite64 64 + VFIO_IOWRITE(64) 65 + #endif 66 + 67 + #define VFIO_IOREAD(size) \ 68 + static int vfio_pci_ioread##size(struct vfio_pci_device *vdev, \ 69 + bool test_mem, u##size *val, void __iomem *io) \ 70 + { \ 71 + if (test_mem) { \ 72 + down_read(&vdev->memory_lock); \ 73 + if (!__vfio_pci_memory_enabled(vdev)) { \ 74 + up_read(&vdev->memory_lock); \ 75 + return -EIO; \ 76 + } \ 77 + } \ 78 + \ 79 + *val = vfio_ioread##size(io); \ 80 + \ 81 + if (test_mem) \ 82 + up_read(&vdev->memory_lock); \ 83 + \ 84 + return 0; \ 85 + } 86 + 87 + VFIO_IOREAD(8) 88 + VFIO_IOREAD(16) 89 + VFIO_IOREAD(32) 90 + 40 91 /* 41 92 * Read or write from an __iomem region (MMIO or I/O port) with an excluded 42 93 * range which is inaccessible. The excluded range drops writes and fills 43 94 * reads with -1. This is intended for handling MSI-X vector tables and 44 95 * leftover space for ROM BARs. 45 96 */ 46 - static ssize_t do_io_rw(void __iomem *io, char __user *buf, 97 + static ssize_t do_io_rw(struct vfio_pci_device *vdev, bool test_mem, 98 + void __iomem *io, char __user *buf, 47 99 loff_t off, size_t count, size_t x_start, 48 100 size_t x_end, bool iswrite) 49 101 { 50 102 ssize_t done = 0; 103 + int ret; 51 104 52 105 while (count) { 53 106 size_t fillable, filled; ··· 119 66 if (copy_from_user(&val, buf, 4)) 120 67 return -EFAULT; 121 68 122 - vfio_iowrite32(val, io + off); 69 + ret = vfio_pci_iowrite32(vdev, test_mem, 70 + val, io + off); 71 + if (ret) 72 + return ret; 123 73 } else { 124 - val = vfio_ioread32(io + off); 74 + ret = vfio_pci_ioread32(vdev, test_mem, 75 + &val, io + off); 76 + if (ret) 77 + return ret; 125 78 126 79 if (copy_to_user(buf, &val, 4)) 127 80 return -EFAULT; ··· 141 82 if (copy_from_user(&val, buf, 2)) 142 83 return -EFAULT; 143 84 144 - vfio_iowrite16(val, io + off); 85 + ret = vfio_pci_iowrite16(vdev, test_mem, 86 + val, io + off); 87 + if (ret) 88 + return ret; 145 89 } else { 146 - val = vfio_ioread16(io + off); 90 + ret = vfio_pci_ioread16(vdev, test_mem, 91 + &val, io + off); 92 + if (ret) 93 + return ret; 147 94 148 95 if (copy_to_user(buf, &val, 2)) 149 96 return -EFAULT; ··· 163 98 if (copy_from_user(&val, buf, 1)) 164 99 return -EFAULT; 165 100 166 - vfio_iowrite8(val, io + off); 101 + ret = vfio_pci_iowrite8(vdev, test_mem, 102 + val, io + off); 103 + if (ret) 104 + return ret; 167 105 } else { 168 - val = vfio_ioread8(io + off); 106 + ret = vfio_pci_ioread8(vdev, test_mem, 107 + &val, io + off); 108 + if (ret) 109 + return ret; 169 110 170 111 if (copy_to_user(buf, &val, 1)) 171 112 return -EFAULT; ··· 249 178 250 179 count = min(count, (size_t)(end - pos)); 251 180 252 - if (res->flags & IORESOURCE_MEM) { 253 - down_read(&vdev->memory_lock); 254 - if (!__vfio_pci_memory_enabled(vdev)) { 255 - up_read(&vdev->memory_lock); 256 - return -EIO; 257 - } 258 - } 259 - 260 181 if (bar == PCI_ROM_RESOURCE) { 261 182 /* 262 183 * The ROM can fill less space than the BAR, so we start the ··· 276 213 x_end = vdev->msix_offset + vdev->msix_size; 277 214 } 278 215 279 - done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite); 216 + done = do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos, 217 + count, x_start, x_end, iswrite); 280 218 281 219 if (done >= 0) 282 220 *ppos += done; ··· 285 221 if (bar == PCI_ROM_RESOURCE) 286 222 pci_unmap_rom(pdev, io); 287 223 out: 288 - if (res->flags & IORESOURCE_MEM) 289 - up_read(&vdev->memory_lock); 290 - 291 224 return done; 292 225 } 293 226 ··· 339 278 return ret; 340 279 } 341 280 342 - done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite); 281 + /* 282 + * VGA MMIO is a legacy, non-BAR resource that hopefully allows 283 + * probing, so we don't currently worry about access in relation 284 + * to the memory enable bit in the command register. 285 + */ 286 + done = do_io_rw(vdev, false, iomem, buf, off, count, 0, 0, iswrite); 343 287 344 288 vga_put(vdev->pdev, rsrc); 345 289 ··· 362 296 363 297 switch (ioeventfd->count) { 364 298 case 1: 365 - vfio_iowrite8(ioeventfd->data, ioeventfd->addr); 299 + vfio_pci_iowrite8(ioeventfd->vdev, ioeventfd->test_mem, 300 + ioeventfd->data, ioeventfd->addr); 366 301 break; 367 302 case 2: 368 - vfio_iowrite16(ioeventfd->data, ioeventfd->addr); 303 + vfio_pci_iowrite16(ioeventfd->vdev, ioeventfd->test_mem, 304 + ioeventfd->data, ioeventfd->addr); 369 305 break; 370 306 case 4: 371 - vfio_iowrite32(ioeventfd->data, ioeventfd->addr); 307 + vfio_pci_iowrite32(ioeventfd->vdev, ioeventfd->test_mem, 308 + ioeventfd->data, ioeventfd->addr); 372 309 break; 373 310 #ifdef iowrite64 374 311 case 8: 375 - vfio_iowrite64(ioeventfd->data, ioeventfd->addr); 312 + vfio_pci_iowrite64(ioeventfd->vdev, ioeventfd->test_mem, 313 + ioeventfd->data, ioeventfd->addr); 376 314 break; 377 315 #endif 378 316 } ··· 448 378 goto out_unlock; 449 379 } 450 380 381 + ioeventfd->vdev = vdev; 451 382 ioeventfd->addr = vdev->barmap[bar] + pos; 452 383 ioeventfd->data = data; 453 384 ioeventfd->pos = pos; 454 385 ioeventfd->bar = bar; 455 386 ioeventfd->count = count; 387 + ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM; 456 388 457 389 ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler, 458 390 NULL, NULL, &ioeventfd->virqfd, fd);
+66 -5
drivers/vfio/vfio_iommu_type1.c
··· 1424 1424 static int vfio_iommu_replay(struct vfio_iommu *iommu, 1425 1425 struct vfio_domain *domain) 1426 1426 { 1427 - struct vfio_domain *d; 1427 + struct vfio_domain *d = NULL; 1428 1428 struct rb_node *n; 1429 1429 unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 1430 1430 int ret; 1431 1431 1432 1432 /* Arbitrarily pick the first domain in the list for lookups */ 1433 - d = list_first_entry(&iommu->domain_list, struct vfio_domain, next); 1433 + if (!list_empty(&iommu->domain_list)) 1434 + d = list_first_entry(&iommu->domain_list, 1435 + struct vfio_domain, next); 1436 + 1434 1437 n = rb_first(&iommu->dma_list); 1435 1438 1436 1439 for (; n; n = rb_next(n)) { ··· 1450 1447 if (dma->iommu_mapped) { 1451 1448 phys_addr_t p; 1452 1449 dma_addr_t i; 1450 + 1451 + if (WARN_ON(!d)) { /* mapped w/o a domain?! */ 1452 + ret = -EINVAL; 1453 + goto unwind; 1454 + } 1453 1455 1454 1456 phys = iommu_iova_to_phys(d->domain, iova); 1455 1457 ··· 1485 1477 if (npage <= 0) { 1486 1478 WARN_ON(!npage); 1487 1479 ret = (int)npage; 1488 - return ret; 1480 + goto unwind; 1489 1481 } 1490 1482 1491 1483 phys = pfn << PAGE_SHIFT; ··· 1494 1486 1495 1487 ret = iommu_map(domain->domain, iova, phys, 1496 1488 size, dma->prot | domain->prot); 1497 - if (ret) 1498 - return ret; 1489 + if (ret) { 1490 + if (!dma->iommu_mapped) 1491 + vfio_unpin_pages_remote(dma, iova, 1492 + phys >> PAGE_SHIFT, 1493 + size >> PAGE_SHIFT, 1494 + true); 1495 + goto unwind; 1496 + } 1499 1497 1500 1498 iova += size; 1501 1499 } 1500 + } 1501 + 1502 + /* All dmas are now mapped, defer to second tree walk for unwind */ 1503 + for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) { 1504 + struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node); 1505 + 1502 1506 dma->iommu_mapped = true; 1503 1507 } 1508 + 1504 1509 return 0; 1510 + 1511 + unwind: 1512 + for (; n; n = rb_prev(n)) { 1513 + struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node); 1514 + dma_addr_t iova; 1515 + 1516 + if (dma->iommu_mapped) { 1517 + iommu_unmap(domain->domain, dma->iova, dma->size); 1518 + continue; 1519 + } 1520 + 1521 + iova = dma->iova; 1522 + while (iova < dma->iova + dma->size) { 1523 + phys_addr_t phys, p; 1524 + size_t size; 1525 + dma_addr_t i; 1526 + 1527 + phys = iommu_iova_to_phys(domain->domain, iova); 1528 + if (!phys) { 1529 + iova += PAGE_SIZE; 1530 + continue; 1531 + } 1532 + 1533 + size = PAGE_SIZE; 1534 + p = phys + size; 1535 + i = iova + size; 1536 + while (i < dma->iova + dma->size && 1537 + p == iommu_iova_to_phys(domain->domain, i)) { 1538 + size += PAGE_SIZE; 1539 + p += PAGE_SIZE; 1540 + i += PAGE_SIZE; 1541 + } 1542 + 1543 + iommu_unmap(domain->domain, iova, size); 1544 + vfio_unpin_pages_remote(dma, iova, phys >> PAGE_SHIFT, 1545 + size >> PAGE_SHIFT, true); 1546 + } 1547 + } 1548 + 1549 + return ret; 1505 1550 } 1506 1551 1507 1552 /*