Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'vfio-v5.7-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

- vfio-pci SR-IOV support (Alex Williamson)

- vfio DMA read/write interface (Yan Zhao)

- Fix vfio-platform erroneous IRQ error log (Eric Auger)

- Fix shared ATSD support for NVLink on POWER (Sam Bobroff)

- Fix init error without CONFIG_IOMMU_DMA (Andre Przywara)

* tag 'vfio-v5.7-rc1' of git://github.com/awilliam/linux-vfio:
vfio: Ignore -ENODEV when getting MSI cookie
vfio-pci/nvlink2: Allow fallback to ibm,mmio-atsd[0]
vfio/pci: Cleanup .probe() exit paths
vfio/pci: Remove dev_fmt definition
vfio/pci: Add sriov_configure support
vfio: Introduce VFIO_DEVICE_FEATURE ioctl and first user
vfio/pci: Introduce VF token
vfio/pci: Implement match ops
vfio: Include optional device match in vfio_device_ops callbacks
vfio: avoid inefficient operations on VFIO group in vfio_pin/unpin_pages
vfio: introduce vfio_dma_rw to read/write a range of IOVAs
vfio: allow external user to get vfio group from device
vfio: platform: Switch to platform_get_irq_optional()

+710 -32
+366 -24
drivers/vfio/pci/vfio_pci.c
··· 9 9 */ 10 10 11 11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 12 - #define dev_fmt pr_fmt 13 12 14 13 #include <linux/device.h> 15 14 #include <linux/eventfd.h> ··· 52 53 module_param(disable_idle_d3, bool, S_IRUGO | S_IWUSR); 53 54 MODULE_PARM_DESC(disable_idle_d3, 54 55 "Disable using the PCI D3 low power state for idle, unused devices"); 56 + 57 + static bool enable_sriov; 58 + #ifdef CONFIG_PCI_IOV 59 + module_param(enable_sriov, bool, 0644); 60 + MODULE_PARM_DESC(enable_sriov, "Enable support for SR-IOV configuration. Enabling SR-IOV on a PF typically requires support of the userspace PF driver, enabling VFs without such support may result in non-functional VFs or PF."); 61 + #endif 55 62 56 63 static inline bool vfio_vga_disabled(void) 57 64 { ··· 471 466 vfio_pci_set_power_state(vdev, PCI_D3hot); 472 467 } 473 468 469 + static struct pci_driver vfio_pci_driver; 470 + 471 + static struct vfio_pci_device *get_pf_vdev(struct vfio_pci_device *vdev, 472 + struct vfio_device **pf_dev) 473 + { 474 + struct pci_dev *physfn = pci_physfn(vdev->pdev); 475 + 476 + if (!vdev->pdev->is_virtfn) 477 + return NULL; 478 + 479 + *pf_dev = vfio_device_get_from_dev(&physfn->dev); 480 + if (!*pf_dev) 481 + return NULL; 482 + 483 + if (pci_dev_driver(physfn) != &vfio_pci_driver) { 484 + vfio_device_put(*pf_dev); 485 + return NULL; 486 + } 487 + 488 + return vfio_device_data(*pf_dev); 489 + } 490 + 491 + static void vfio_pci_vf_token_user_add(struct vfio_pci_device *vdev, int val) 492 + { 493 + struct vfio_device *pf_dev; 494 + struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev, &pf_dev); 495 + 496 + if (!pf_vdev) 497 + return; 498 + 499 + mutex_lock(&pf_vdev->vf_token->lock); 500 + pf_vdev->vf_token->users += val; 501 + WARN_ON(pf_vdev->vf_token->users < 0); 502 + mutex_unlock(&pf_vdev->vf_token->lock); 503 + 504 + vfio_device_put(pf_dev); 505 + } 506 + 474 507 static void vfio_pci_release(void *device_data) 475 508 { 476 509 struct vfio_pci_device *vdev = device_data; ··· 516 473 mutex_lock(&vdev->reflck->lock); 517 474 518 475 if (!(--vdev->refcnt)) { 476 + vfio_pci_vf_token_user_add(vdev, -1); 519 477 vfio_spapr_pci_eeh_release(vdev->pdev); 520 478 vfio_pci_disable(vdev); 521 479 } ··· 542 498 goto error; 543 499 544 500 vfio_spapr_pci_eeh_open(vdev->pdev); 501 + vfio_pci_vf_token_user_add(vdev, 1); 545 502 } 546 503 vdev->refcnt++; 547 504 error: ··· 1185 1140 1186 1141 return vfio_pci_ioeventfd(vdev, ioeventfd.offset, 1187 1142 ioeventfd.data, count, ioeventfd.fd); 1143 + } else if (cmd == VFIO_DEVICE_FEATURE) { 1144 + struct vfio_device_feature feature; 1145 + uuid_t uuid; 1146 + 1147 + minsz = offsetofend(struct vfio_device_feature, flags); 1148 + 1149 + if (copy_from_user(&feature, (void __user *)arg, minsz)) 1150 + return -EFAULT; 1151 + 1152 + if (feature.argsz < minsz) 1153 + return -EINVAL; 1154 + 1155 + /* Check unknown flags */ 1156 + if (feature.flags & ~(VFIO_DEVICE_FEATURE_MASK | 1157 + VFIO_DEVICE_FEATURE_SET | 1158 + VFIO_DEVICE_FEATURE_GET | 1159 + VFIO_DEVICE_FEATURE_PROBE)) 1160 + return -EINVAL; 1161 + 1162 + /* GET & SET are mutually exclusive except with PROBE */ 1163 + if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) && 1164 + (feature.flags & VFIO_DEVICE_FEATURE_SET) && 1165 + (feature.flags & VFIO_DEVICE_FEATURE_GET)) 1166 + return -EINVAL; 1167 + 1168 + switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) { 1169 + case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN: 1170 + if (!vdev->vf_token) 1171 + return -ENOTTY; 1172 + 1173 + /* 1174 + * We do not support GET of the VF Token UUID as this 1175 + * could expose the token of the previous device user. 1176 + */ 1177 + if (feature.flags & VFIO_DEVICE_FEATURE_GET) 1178 + return -EINVAL; 1179 + 1180 + if (feature.flags & VFIO_DEVICE_FEATURE_PROBE) 1181 + return 0; 1182 + 1183 + /* Don't SET unless told to do so */ 1184 + if (!(feature.flags & VFIO_DEVICE_FEATURE_SET)) 1185 + return -EINVAL; 1186 + 1187 + if (feature.argsz < minsz + sizeof(uuid)) 1188 + return -EINVAL; 1189 + 1190 + if (copy_from_user(&uuid, (void __user *)(arg + minsz), 1191 + sizeof(uuid))) 1192 + return -EFAULT; 1193 + 1194 + mutex_lock(&vdev->vf_token->lock); 1195 + uuid_copy(&vdev->vf_token->uuid, &uuid); 1196 + mutex_unlock(&vdev->vf_token->lock); 1197 + 1198 + return 0; 1199 + default: 1200 + return -ENOTTY; 1201 + } 1188 1202 } 1189 1203 1190 1204 return -ENOTTY; ··· 1382 1278 mutex_unlock(&vdev->igate); 1383 1279 } 1384 1280 1281 + static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev, 1282 + bool vf_token, uuid_t *uuid) 1283 + { 1284 + /* 1285 + * There's always some degree of trust or collaboration between SR-IOV 1286 + * PF and VFs, even if just that the PF hosts the SR-IOV capability and 1287 + * can disrupt VFs with a reset, but often the PF has more explicit 1288 + * access to deny service to the VF or access data passed through the 1289 + * VF. We therefore require an opt-in via a shared VF token (UUID) to 1290 + * represent this trust. This both prevents that a VF driver might 1291 + * assume the PF driver is a trusted, in-kernel driver, and also that 1292 + * a PF driver might be replaced with a rogue driver, unknown to in-use 1293 + * VF drivers. 1294 + * 1295 + * Therefore when presented with a VF, if the PF is a vfio device and 1296 + * it is bound to the vfio-pci driver, the user needs to provide a VF 1297 + * token to access the device, in the form of appending a vf_token to 1298 + * the device name, for example: 1299 + * 1300 + * "0000:04:10.0 vf_token=bd8d9d2b-5a5f-4f5a-a211-f591514ba1f3" 1301 + * 1302 + * When presented with a PF which has VFs in use, the user must also 1303 + * provide the current VF token to prove collaboration with existing 1304 + * VF users. If VFs are not in use, the VF token provided for the PF 1305 + * device will act to set the VF token. 1306 + * 1307 + * If the VF token is provided but unused, an error is generated. 1308 + */ 1309 + if (!vdev->pdev->is_virtfn && !vdev->vf_token && !vf_token) 1310 + return 0; /* No VF token provided or required */ 1311 + 1312 + if (vdev->pdev->is_virtfn) { 1313 + struct vfio_device *pf_dev; 1314 + struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev, &pf_dev); 1315 + bool match; 1316 + 1317 + if (!pf_vdev) { 1318 + if (!vf_token) 1319 + return 0; /* PF is not vfio-pci, no VF token */ 1320 + 1321 + pci_info_ratelimited(vdev->pdev, 1322 + "VF token incorrectly provided, PF not bound to vfio-pci\n"); 1323 + return -EINVAL; 1324 + } 1325 + 1326 + if (!vf_token) { 1327 + vfio_device_put(pf_dev); 1328 + pci_info_ratelimited(vdev->pdev, 1329 + "VF token required to access device\n"); 1330 + return -EACCES; 1331 + } 1332 + 1333 + mutex_lock(&pf_vdev->vf_token->lock); 1334 + match = uuid_equal(uuid, &pf_vdev->vf_token->uuid); 1335 + mutex_unlock(&pf_vdev->vf_token->lock); 1336 + 1337 + vfio_device_put(pf_dev); 1338 + 1339 + if (!match) { 1340 + pci_info_ratelimited(vdev->pdev, 1341 + "Incorrect VF token provided for device\n"); 1342 + return -EACCES; 1343 + } 1344 + } else if (vdev->vf_token) { 1345 + mutex_lock(&vdev->vf_token->lock); 1346 + if (vdev->vf_token->users) { 1347 + if (!vf_token) { 1348 + mutex_unlock(&vdev->vf_token->lock); 1349 + pci_info_ratelimited(vdev->pdev, 1350 + "VF token required to access device\n"); 1351 + return -EACCES; 1352 + } 1353 + 1354 + if (!uuid_equal(uuid, &vdev->vf_token->uuid)) { 1355 + mutex_unlock(&vdev->vf_token->lock); 1356 + pci_info_ratelimited(vdev->pdev, 1357 + "Incorrect VF token provided for device\n"); 1358 + return -EACCES; 1359 + } 1360 + } else if (vf_token) { 1361 + uuid_copy(&vdev->vf_token->uuid, uuid); 1362 + } 1363 + 1364 + mutex_unlock(&vdev->vf_token->lock); 1365 + } else if (vf_token) { 1366 + pci_info_ratelimited(vdev->pdev, 1367 + "VF token incorrectly provided, not a PF or VF\n"); 1368 + return -EINVAL; 1369 + } 1370 + 1371 + return 0; 1372 + } 1373 + 1374 + #define VF_TOKEN_ARG "vf_token=" 1375 + 1376 + static int vfio_pci_match(void *device_data, char *buf) 1377 + { 1378 + struct vfio_pci_device *vdev = device_data; 1379 + bool vf_token = false; 1380 + uuid_t uuid; 1381 + int ret; 1382 + 1383 + if (strncmp(pci_name(vdev->pdev), buf, strlen(pci_name(vdev->pdev)))) 1384 + return 0; /* No match */ 1385 + 1386 + if (strlen(buf) > strlen(pci_name(vdev->pdev))) { 1387 + buf += strlen(pci_name(vdev->pdev)); 1388 + 1389 + if (*buf != ' ') 1390 + return 0; /* No match: non-whitespace after name */ 1391 + 1392 + while (*buf) { 1393 + if (*buf == ' ') { 1394 + buf++; 1395 + continue; 1396 + } 1397 + 1398 + if (!vf_token && !strncmp(buf, VF_TOKEN_ARG, 1399 + strlen(VF_TOKEN_ARG))) { 1400 + buf += strlen(VF_TOKEN_ARG); 1401 + 1402 + if (strlen(buf) < UUID_STRING_LEN) 1403 + return -EINVAL; 1404 + 1405 + ret = uuid_parse(buf, &uuid); 1406 + if (ret) 1407 + return ret; 1408 + 1409 + vf_token = true; 1410 + buf += UUID_STRING_LEN; 1411 + } else { 1412 + /* Unknown/duplicate option */ 1413 + return -EINVAL; 1414 + } 1415 + } 1416 + } 1417 + 1418 + ret = vfio_pci_validate_vf_token(vdev, vf_token, &uuid); 1419 + if (ret) 1420 + return ret; 1421 + 1422 + return 1; /* Match */ 1423 + } 1424 + 1385 1425 static const struct vfio_device_ops vfio_pci_ops = { 1386 1426 .name = "vfio-pci", 1387 1427 .open = vfio_pci_open, ··· 1535 1287 .write = vfio_pci_write, 1536 1288 .mmap = vfio_pci_mmap, 1537 1289 .request = vfio_pci_request, 1290 + .match = vfio_pci_match, 1538 1291 }; 1539 1292 1540 1293 static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev); 1541 1294 static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck); 1295 + static struct pci_driver vfio_pci_driver; 1296 + 1297 + static int vfio_pci_bus_notifier(struct notifier_block *nb, 1298 + unsigned long action, void *data) 1299 + { 1300 + struct vfio_pci_device *vdev = container_of(nb, 1301 + struct vfio_pci_device, nb); 1302 + struct device *dev = data; 1303 + struct pci_dev *pdev = to_pci_dev(dev); 1304 + struct pci_dev *physfn = pci_physfn(pdev); 1305 + 1306 + if (action == BUS_NOTIFY_ADD_DEVICE && 1307 + pdev->is_virtfn && physfn == vdev->pdev) { 1308 + pci_info(vdev->pdev, "Captured SR-IOV VF %s driver_override\n", 1309 + pci_name(pdev)); 1310 + pdev->driver_override = kasprintf(GFP_KERNEL, "%s", 1311 + vfio_pci_ops.name); 1312 + } else if (action == BUS_NOTIFY_BOUND_DRIVER && 1313 + pdev->is_virtfn && physfn == vdev->pdev) { 1314 + struct pci_driver *drv = pci_dev_driver(pdev); 1315 + 1316 + if (drv && drv != &vfio_pci_driver) 1317 + pci_warn(vdev->pdev, 1318 + "VF %s bound to driver %s while PF bound to vfio-pci\n", 1319 + pci_name(pdev), drv->name); 1320 + } 1321 + 1322 + return 0; 1323 + } 1542 1324 1543 1325 static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 1544 1326 { ··· 1580 1302 return -EINVAL; 1581 1303 1582 1304 /* 1583 - * Prevent binding to PFs with VFs enabled, this too easily allows 1584 - * userspace instance with VFs and PFs from the same device, which 1585 - * cannot work. Disabling SR-IOV here would initiate removing the 1586 - * VFs, which would unbind the driver, which is prone to blocking 1587 - * if that VF is also in use by vfio-pci. Just reject these PFs 1588 - * and let the user sort it out. 1305 + * Prevent binding to PFs with VFs enabled, the VFs might be in use 1306 + * by the host or other users. We cannot capture the VFs if they 1307 + * already exist, nor can we track VF users. Disabling SR-IOV here 1308 + * would initiate removing the VFs, which would unbind the driver, 1309 + * which is prone to blocking if that VF is also in use by vfio-pci. 1310 + * Just reject these PFs and let the user sort it out. 1589 1311 */ 1590 1312 if (pci_num_vf(pdev)) { 1591 1313 pci_warn(pdev, "Cannot bind to PF with SR-IOV enabled\n"); ··· 1598 1320 1599 1321 vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); 1600 1322 if (!vdev) { 1601 - vfio_iommu_group_put(group, &pdev->dev); 1602 - return -ENOMEM; 1323 + ret = -ENOMEM; 1324 + goto out_group_put; 1603 1325 } 1604 1326 1605 1327 vdev->pdev = pdev; ··· 1610 1332 INIT_LIST_HEAD(&vdev->ioeventfds_list); 1611 1333 1612 1334 ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev); 1613 - if (ret) { 1614 - vfio_iommu_group_put(group, &pdev->dev); 1615 - kfree(vdev); 1616 - return ret; 1617 - } 1335 + if (ret) 1336 + goto out_free; 1618 1337 1619 1338 ret = vfio_pci_reflck_attach(vdev); 1620 - if (ret) { 1621 - vfio_del_group_dev(&pdev->dev); 1622 - vfio_iommu_group_put(group, &pdev->dev); 1623 - kfree(vdev); 1624 - return ret; 1339 + if (ret) 1340 + goto out_del_group_dev; 1341 + 1342 + if (pdev->is_physfn) { 1343 + vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL); 1344 + if (!vdev->vf_token) { 1345 + ret = -ENOMEM; 1346 + goto out_reflck; 1347 + } 1348 + 1349 + mutex_init(&vdev->vf_token->lock); 1350 + uuid_gen(&vdev->vf_token->uuid); 1351 + 1352 + vdev->nb.notifier_call = vfio_pci_bus_notifier; 1353 + ret = bus_register_notifier(&pci_bus_type, &vdev->nb); 1354 + if (ret) 1355 + goto out_vf_token; 1625 1356 } 1626 1357 1627 1358 if (vfio_pci_is_vga(pdev)) { ··· 1656 1369 } 1657 1370 1658 1371 return ret; 1372 + 1373 + out_vf_token: 1374 + kfree(vdev->vf_token); 1375 + out_reflck: 1376 + vfio_pci_reflck_put(vdev->reflck); 1377 + out_del_group_dev: 1378 + vfio_del_group_dev(&pdev->dev); 1379 + out_free: 1380 + kfree(vdev); 1381 + out_group_put: 1382 + vfio_iommu_group_put(group, &pdev->dev); 1383 + return ret; 1659 1384 } 1660 1385 1661 1386 static void vfio_pci_remove(struct pci_dev *pdev) 1662 1387 { 1663 1388 struct vfio_pci_device *vdev; 1664 1389 1390 + pci_disable_sriov(pdev); 1391 + 1665 1392 vdev = vfio_del_group_dev(&pdev->dev); 1666 1393 if (!vdev) 1667 1394 return; 1395 + 1396 + if (vdev->vf_token) { 1397 + WARN_ON(vdev->vf_token->users); 1398 + mutex_destroy(&vdev->vf_token->lock); 1399 + kfree(vdev->vf_token); 1400 + } 1401 + 1402 + if (vdev->nb.notifier_call) 1403 + bus_unregister_notifier(&pci_bus_type, &vdev->nb); 1668 1404 1669 1405 vfio_pci_reflck_put(vdev->reflck); 1670 1406 ··· 1737 1427 return PCI_ERS_RESULT_CAN_RECOVER; 1738 1428 } 1739 1429 1430 + static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn) 1431 + { 1432 + struct vfio_pci_device *vdev; 1433 + struct vfio_device *device; 1434 + int ret = 0; 1435 + 1436 + might_sleep(); 1437 + 1438 + if (!enable_sriov) 1439 + return -ENOENT; 1440 + 1441 + device = vfio_device_get_from_dev(&pdev->dev); 1442 + if (!device) 1443 + return -ENODEV; 1444 + 1445 + vdev = vfio_device_data(device); 1446 + if (!vdev) { 1447 + vfio_device_put(device); 1448 + return -ENODEV; 1449 + } 1450 + 1451 + if (nr_virtfn == 0) 1452 + pci_disable_sriov(pdev); 1453 + else 1454 + ret = pci_enable_sriov(pdev, nr_virtfn); 1455 + 1456 + vfio_device_put(device); 1457 + 1458 + return ret < 0 ? ret : nr_virtfn; 1459 + } 1460 + 1740 1461 static const struct pci_error_handlers vfio_err_handlers = { 1741 1462 .error_detected = vfio_pci_aer_err_detected, 1742 1463 }; 1743 1464 1744 1465 static struct pci_driver vfio_pci_driver = { 1745 - .name = "vfio-pci", 1746 - .id_table = NULL, /* only dynamic ids */ 1747 - .probe = vfio_pci_probe, 1748 - .remove = vfio_pci_remove, 1749 - .err_handler = &vfio_err_handlers, 1466 + .name = "vfio-pci", 1467 + .id_table = NULL, /* only dynamic ids */ 1468 + .probe = vfio_pci_probe, 1469 + .remove = vfio_pci_remove, 1470 + .sriov_configure = vfio_pci_sriov_configure, 1471 + .err_handler = &vfio_err_handlers, 1750 1472 }; 1751 1473 1752 1474 static DEFINE_MUTEX(reflck_lock);
+8 -2
drivers/vfio/pci/vfio_pci_nvlink2.c
··· 422 422 423 423 if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", nvlink_index, 424 424 &mmio_atsd)) { 425 - dev_warn(&vdev->pdev->dev, "No available ATSD found\n"); 426 - mmio_atsd = 0; 425 + if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", 0, 426 + &mmio_atsd)) { 427 + dev_warn(&vdev->pdev->dev, "No available ATSD found\n"); 428 + mmio_atsd = 0; 429 + } else { 430 + dev_warn(&vdev->pdev->dev, 431 + "Using fallback ibm,mmio-atsd[0] for ATSD.\n"); 432 + } 427 433 } 428 434 429 435 if (of_property_read_u64(npu_node, "ibm,device-tgt-addr", &tgt)) {
+10
drivers/vfio/pci/vfio_pci_private.h
··· 12 12 #include <linux/pci.h> 13 13 #include <linux/irqbypass.h> 14 14 #include <linux/types.h> 15 + #include <linux/uuid.h> 16 + #include <linux/notifier.h> 15 17 16 18 #ifndef VFIO_PCI_PRIVATE_H 17 19 #define VFIO_PCI_PRIVATE_H ··· 86 84 struct mutex lock; 87 85 }; 88 86 87 + struct vfio_pci_vf_token { 88 + struct mutex lock; 89 + uuid_t uuid; 90 + int users; 91 + }; 92 + 89 93 struct vfio_pci_device { 90 94 struct pci_dev *pdev; 91 95 void __iomem *barmap[PCI_STD_NUM_BARS]; ··· 130 122 struct list_head dummy_resources_list; 131 123 struct mutex ioeventfds_lock; 132 124 struct list_head ioeventfds_list; 125 + struct vfio_pci_vf_token *vf_token; 126 + struct notifier_block nb; 133 127 }; 134 128 135 129 #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
+1 -1
drivers/vfio/platform/vfio_platform.c
··· 44 44 { 45 45 struct platform_device *pdev = (struct platform_device *) vdev->opaque; 46 46 47 - return platform_get_irq(pdev, i); 47 + return platform_get_irq_optional(pdev, i); 48 48 } 49 49 50 50 static int vfio_platform_probe(struct platform_device *pdev)
+194 -4
drivers/vfio/vfio.c
··· 875 875 static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, 876 876 char *buf) 877 877 { 878 - struct vfio_device *it, *device = NULL; 878 + struct vfio_device *it, *device = ERR_PTR(-ENODEV); 879 879 880 880 mutex_lock(&group->device_lock); 881 881 list_for_each_entry(it, &group->device_list, group_next) { 882 - if (!strcmp(dev_name(it->dev), buf)) { 882 + int ret; 883 + 884 + if (it->ops->match) { 885 + ret = it->ops->match(it->device_data, buf); 886 + if (ret < 0) { 887 + device = ERR_PTR(ret); 888 + break; 889 + } 890 + } else { 891 + ret = !strcmp(dev_name(it->dev), buf); 892 + } 893 + 894 + if (ret) { 883 895 device = it; 884 896 vfio_device_get(device); 885 897 break; ··· 1442 1430 return -EPERM; 1443 1431 1444 1432 device = vfio_device_get_from_name(group, buf); 1445 - if (!device) 1446 - return -ENODEV; 1433 + if (IS_ERR(device)) 1434 + return PTR_ERR(device); 1447 1435 1448 1436 ret = device->ops->open(device->device_data); 1449 1437 if (ret) { ··· 1732 1720 } 1733 1721 EXPORT_SYMBOL_GPL(vfio_group_get_external_user); 1734 1722 1723 + /** 1724 + * External user API, exported by symbols to be linked dynamically. 1725 + * The external user passes in a device pointer 1726 + * to verify that: 1727 + * - A VFIO group is assiciated with the device; 1728 + * - IOMMU is set for the group. 1729 + * If both checks passed, vfio_group_get_external_user_from_dev() 1730 + * increments the container user counter to prevent the VFIO group 1731 + * from disposal before external user exits and returns the pointer 1732 + * to the VFIO group. 1733 + * 1734 + * When the external user finishes using the VFIO group, it calls 1735 + * vfio_group_put_external_user() to release the VFIO group and 1736 + * decrement the container user counter. 1737 + * 1738 + * @dev [in] : device 1739 + * Return error PTR or pointer to VFIO group. 1740 + */ 1741 + 1742 + struct vfio_group *vfio_group_get_external_user_from_dev(struct device *dev) 1743 + { 1744 + struct vfio_group *group; 1745 + int ret; 1746 + 1747 + group = vfio_group_get_from_dev(dev); 1748 + if (!group) 1749 + return ERR_PTR(-ENODEV); 1750 + 1751 + ret = vfio_group_add_container_user(group); 1752 + if (ret) { 1753 + vfio_group_put(group); 1754 + return ERR_PTR(ret); 1755 + } 1756 + 1757 + return group; 1758 + } 1759 + EXPORT_SYMBOL_GPL(vfio_group_get_external_user_from_dev); 1760 + 1735 1761 void vfio_group_put_external_user(struct vfio_group *group) 1736 1762 { 1737 1763 vfio_group_try_dissolve_container(group); ··· 2010 1960 return ret; 2011 1961 } 2012 1962 EXPORT_SYMBOL(vfio_unpin_pages); 1963 + 1964 + /* 1965 + * Pin a set of guest IOVA PFNs and return their associated host PFNs for a 1966 + * VFIO group. 1967 + * 1968 + * The caller needs to call vfio_group_get_external_user() or 1969 + * vfio_group_get_external_user_from_dev() prior to calling this interface, 1970 + * so as to prevent the VFIO group from disposal in the middle of the call. 1971 + * But it can keep the reference to the VFIO group for several calls into 1972 + * this interface. 1973 + * After finishing using of the VFIO group, the caller needs to release the 1974 + * VFIO group by calling vfio_group_put_external_user(). 1975 + * 1976 + * @group [in] : VFIO group 1977 + * @user_iova_pfn [in] : array of user/guest IOVA PFNs to be pinned. 1978 + * @npage [in] : count of elements in user_iova_pfn array. 1979 + * This count should not be greater 1980 + * VFIO_PIN_PAGES_MAX_ENTRIES. 1981 + * @prot [in] : protection flags 1982 + * @phys_pfn [out] : array of host PFNs 1983 + * Return error or number of pages pinned. 1984 + */ 1985 + int vfio_group_pin_pages(struct vfio_group *group, 1986 + unsigned long *user_iova_pfn, int npage, 1987 + int prot, unsigned long *phys_pfn) 1988 + { 1989 + struct vfio_container *container; 1990 + struct vfio_iommu_driver *driver; 1991 + int ret; 1992 + 1993 + if (!group || !user_iova_pfn || !phys_pfn || !npage) 1994 + return -EINVAL; 1995 + 1996 + if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) 1997 + return -E2BIG; 1998 + 1999 + container = group->container; 2000 + driver = container->iommu_driver; 2001 + if (likely(driver && driver->ops->pin_pages)) 2002 + ret = driver->ops->pin_pages(container->iommu_data, 2003 + user_iova_pfn, npage, 2004 + prot, phys_pfn); 2005 + else 2006 + ret = -ENOTTY; 2007 + 2008 + return ret; 2009 + } 2010 + EXPORT_SYMBOL(vfio_group_pin_pages); 2011 + 2012 + /* 2013 + * Unpin a set of guest IOVA PFNs for a VFIO group. 2014 + * 2015 + * The caller needs to call vfio_group_get_external_user() or 2016 + * vfio_group_get_external_user_from_dev() prior to calling this interface, 2017 + * so as to prevent the VFIO group from disposal in the middle of the call. 2018 + * But it can keep the reference to the VFIO group for several calls into 2019 + * this interface. 2020 + * After finishing using of the VFIO group, the caller needs to release the 2021 + * VFIO group by calling vfio_group_put_external_user(). 2022 + * 2023 + * @group [in] : vfio group 2024 + * @user_iova_pfn [in] : array of user/guest IOVA PFNs to be unpinned. 2025 + * @npage [in] : count of elements in user_iova_pfn array. 2026 + * This count should not be greater than 2027 + * VFIO_PIN_PAGES_MAX_ENTRIES. 2028 + * Return error or number of pages unpinned. 2029 + */ 2030 + int vfio_group_unpin_pages(struct vfio_group *group, 2031 + unsigned long *user_iova_pfn, int npage) 2032 + { 2033 + struct vfio_container *container; 2034 + struct vfio_iommu_driver *driver; 2035 + int ret; 2036 + 2037 + if (!group || !user_iova_pfn || !npage) 2038 + return -EINVAL; 2039 + 2040 + if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) 2041 + return -E2BIG; 2042 + 2043 + container = group->container; 2044 + driver = container->iommu_driver; 2045 + if (likely(driver && driver->ops->unpin_pages)) 2046 + ret = driver->ops->unpin_pages(container->iommu_data, 2047 + user_iova_pfn, npage); 2048 + else 2049 + ret = -ENOTTY; 2050 + 2051 + return ret; 2052 + } 2053 + EXPORT_SYMBOL(vfio_group_unpin_pages); 2054 + 2055 + 2056 + /* 2057 + * This interface allows the CPUs to perform some sort of virtual DMA on 2058 + * behalf of the device. 2059 + * 2060 + * CPUs read/write from/into a range of IOVAs pointing to user space memory 2061 + * into/from a kernel buffer. 2062 + * 2063 + * As the read/write of user space memory is conducted via the CPUs and is 2064 + * not a real device DMA, it is not necessary to pin the user space memory. 2065 + * 2066 + * The caller needs to call vfio_group_get_external_user() or 2067 + * vfio_group_get_external_user_from_dev() prior to calling this interface, 2068 + * so as to prevent the VFIO group from disposal in the middle of the call. 2069 + * But it can keep the reference to the VFIO group for several calls into 2070 + * this interface. 2071 + * After finishing using of the VFIO group, the caller needs to release the 2072 + * VFIO group by calling vfio_group_put_external_user(). 2073 + * 2074 + * @group [in] : VFIO group 2075 + * @user_iova [in] : base IOVA of a user space buffer 2076 + * @data [in] : pointer to kernel buffer 2077 + * @len [in] : kernel buffer length 2078 + * @write : indicate read or write 2079 + * Return error code on failure or 0 on success. 2080 + */ 2081 + int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova, 2082 + void *data, size_t len, bool write) 2083 + { 2084 + struct vfio_container *container; 2085 + struct vfio_iommu_driver *driver; 2086 + int ret = 0; 2087 + 2088 + if (!group || !data || len <= 0) 2089 + return -EINVAL; 2090 + 2091 + container = group->container; 2092 + driver = container->iommu_driver; 2093 + 2094 + if (likely(driver && driver->ops->dma_rw)) 2095 + ret = driver->ops->dma_rw(container->iommu_data, 2096 + user_iova, data, len, write); 2097 + else 2098 + ret = -ENOTTY; 2099 + 2100 + return ret; 2101 + } 2102 + EXPORT_SYMBOL(vfio_dma_rw); 2013 2103 2014 2104 static int vfio_register_iommu_notifier(struct vfio_group *group, 2015 2105 unsigned long *events,
+77 -1
drivers/vfio/vfio_iommu_type1.c
··· 27 27 #include <linux/iommu.h> 28 28 #include <linux/module.h> 29 29 #include <linux/mm.h> 30 + #include <linux/mmu_context.h> 30 31 #include <linux/rbtree.h> 31 32 #include <linux/sched/signal.h> 32 33 #include <linux/sched/mm.h> ··· 1787 1786 1788 1787 if (resv_msi) { 1789 1788 ret = iommu_get_msi_cookie(domain->domain, resv_msi_base); 1790 - if (ret) 1789 + if (ret && ret != -ENODEV) 1791 1790 goto out_detach; 1792 1791 } 1793 1792 ··· 2306 2305 return blocking_notifier_chain_unregister(&iommu->notifier, nb); 2307 2306 } 2308 2307 2308 + static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, 2309 + dma_addr_t user_iova, void *data, 2310 + size_t count, bool write, 2311 + size_t *copied) 2312 + { 2313 + struct mm_struct *mm; 2314 + unsigned long vaddr; 2315 + struct vfio_dma *dma; 2316 + bool kthread = current->mm == NULL; 2317 + size_t offset; 2318 + 2319 + *copied = 0; 2320 + 2321 + dma = vfio_find_dma(iommu, user_iova, 1); 2322 + if (!dma) 2323 + return -EINVAL; 2324 + 2325 + if ((write && !(dma->prot & IOMMU_WRITE)) || 2326 + !(dma->prot & IOMMU_READ)) 2327 + return -EPERM; 2328 + 2329 + mm = get_task_mm(dma->task); 2330 + 2331 + if (!mm) 2332 + return -EPERM; 2333 + 2334 + if (kthread) 2335 + use_mm(mm); 2336 + else if (current->mm != mm) 2337 + goto out; 2338 + 2339 + offset = user_iova - dma->iova; 2340 + 2341 + if (count > dma->size - offset) 2342 + count = dma->size - offset; 2343 + 2344 + vaddr = dma->vaddr + offset; 2345 + 2346 + if (write) 2347 + *copied = __copy_to_user((void __user *)vaddr, data, 2348 + count) ? 0 : count; 2349 + else 2350 + *copied = __copy_from_user(data, (void __user *)vaddr, 2351 + count) ? 0 : count; 2352 + if (kthread) 2353 + unuse_mm(mm); 2354 + out: 2355 + mmput(mm); 2356 + return *copied ? 0 : -EFAULT; 2357 + } 2358 + 2359 + static int vfio_iommu_type1_dma_rw(void *iommu_data, dma_addr_t user_iova, 2360 + void *data, size_t count, bool write) 2361 + { 2362 + struct vfio_iommu *iommu = iommu_data; 2363 + int ret = 0; 2364 + size_t done; 2365 + 2366 + mutex_lock(&iommu->lock); 2367 + while (count > 0) { 2368 + ret = vfio_iommu_type1_dma_rw_chunk(iommu, user_iova, data, 2369 + count, write, &done); 2370 + if (ret) 2371 + break; 2372 + 2373 + count -= done; 2374 + data += done; 2375 + user_iova += done; 2376 + } 2377 + 2378 + mutex_unlock(&iommu->lock); 2379 + return ret; 2380 + } 2381 + 2309 2382 static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { 2310 2383 .name = "vfio-iommu-type1", 2311 2384 .owner = THIS_MODULE, ··· 2392 2317 .unpin_pages = vfio_iommu_type1_unpin_pages, 2393 2318 .register_notifier = vfio_iommu_type1_register_notifier, 2394 2319 .unregister_notifier = vfio_iommu_type1_unregister_notifier, 2320 + .dma_rw = vfio_iommu_type1_dma_rw, 2395 2321 }; 2396 2322 2397 2323 static int __init vfio_iommu_type1_init(void)
+17
include/linux/vfio.h
··· 26 26 * operations documented below 27 27 * @mmap: Perform mmap(2) on a region of the device file descriptor 28 28 * @request: Request for the bus driver to release the device 29 + * @match: Optional device name match callback (return: 0 for no-match, >0 for 30 + * match, -errno for abort (ex. match with insufficient or incorrect 31 + * additional args) 29 32 */ 30 33 struct vfio_device_ops { 31 34 char *name; ··· 42 39 unsigned long arg); 43 40 int (*mmap)(void *device_data, struct vm_area_struct *vma); 44 41 void (*request)(void *device_data, unsigned int count); 42 + int (*match)(void *device_data, char *buf); 45 43 }; 46 44 47 45 extern struct iommu_group *vfio_iommu_group_get(struct device *dev); ··· 86 82 struct notifier_block *nb); 87 83 int (*unregister_notifier)(void *iommu_data, 88 84 struct notifier_block *nb); 85 + int (*dma_rw)(void *iommu_data, dma_addr_t user_iova, 86 + void *data, size_t count, bool write); 89 87 }; 90 88 91 89 extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); ··· 100 94 */ 101 95 extern struct vfio_group *vfio_group_get_external_user(struct file *filep); 102 96 extern void vfio_group_put_external_user(struct vfio_group *group); 97 + extern struct vfio_group *vfio_group_get_external_user_from_dev(struct device 98 + *dev); 103 99 extern bool vfio_external_group_match_file(struct vfio_group *group, 104 100 struct file *filep); 105 101 extern int vfio_external_user_iommu_id(struct vfio_group *group); ··· 114 106 int npage, int prot, unsigned long *phys_pfn); 115 107 extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, 116 108 int npage); 109 + 110 + extern int vfio_group_pin_pages(struct vfio_group *group, 111 + unsigned long *user_iova_pfn, int npage, 112 + int prot, unsigned long *phys_pfn); 113 + extern int vfio_group_unpin_pages(struct vfio_group *group, 114 + unsigned long *user_iova_pfn, int npage); 115 + 116 + extern int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova, 117 + void *data, size_t len, bool write); 117 118 118 119 /* each type has independent events */ 119 120 enum vfio_notify_type {
+37
include/uapi/linux/vfio.h
··· 707 707 708 708 #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16) 709 709 710 + /** 711 + * VFIO_DEVICE_FEATURE - _IORW(VFIO_TYPE, VFIO_BASE + 17, 712 + * struct vfio_device_feature) 713 + * 714 + * Get, set, or probe feature data of the device. The feature is selected 715 + * using the FEATURE_MASK portion of the flags field. Support for a feature 716 + * can be probed by setting both the FEATURE_MASK and PROBE bits. A probe 717 + * may optionally include the GET and/or SET bits to determine read vs write 718 + * access of the feature respectively. Probing a feature will return success 719 + * if the feature is supported and all of the optionally indicated GET/SET 720 + * methods are supported. The format of the data portion of the structure is 721 + * specific to the given feature. The data portion is not required for 722 + * probing. GET and SET are mutually exclusive, except for use with PROBE. 723 + * 724 + * Return 0 on success, -errno on failure. 725 + */ 726 + struct vfio_device_feature { 727 + __u32 argsz; 728 + __u32 flags; 729 + #define VFIO_DEVICE_FEATURE_MASK (0xffff) /* 16-bit feature index */ 730 + #define VFIO_DEVICE_FEATURE_GET (1 << 16) /* Get feature into data[] */ 731 + #define VFIO_DEVICE_FEATURE_SET (1 << 17) /* Set feature from data[] */ 732 + #define VFIO_DEVICE_FEATURE_PROBE (1 << 18) /* Probe feature support */ 733 + __u8 data[]; 734 + }; 735 + 736 + #define VFIO_DEVICE_FEATURE _IO(VFIO_TYPE, VFIO_BASE + 17) 737 + 738 + /* 739 + * Provide support for setting a PCI VF Token, which is used as a shared 740 + * secret between PF and VF drivers. This feature may only be set on a 741 + * PCI SR-IOV PF when SR-IOV is enabled on the PF and there are no existing 742 + * open VFs. Data provided when setting this feature is a 16-byte array 743 + * (__u8 b[16]), representing a UUID. 744 + */ 745 + #define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0) 746 + 710 747 /* -------- API for Type1 VFIO IOMMU -------- */ 711 748 712 749 /**