Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'vfio-v6.13-rc1' of https://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

- Constify an unmodified structure used in linking vfio and kvm
(Christophe JAILLET)

- Add ID for an additional hardware SKU supported by the nvgrace-gpu
vfio-pci variant driver (Ankit Agrawal)

- Fix incorrect signed cast in QAT vfio-pci variant driver, negating
test in check_add_overflow(), though still caught by later tests
(Giovanni Cabiddu)

- Additional debugfs attributes exposed in hisi_acc vfio-pci variant
driver for migration debugging (Longfang Liu)

- Migration support is added to the virtio vfio-pci variant driver,
becoming the primary feature of the driver while retaining emulation
of virtio legacy support as a secondary option (Yishai Hadas)

- Fixes to a few unwind flows in the mlx5 vfio-pci driver discovered
through reviews of the virtio variant driver (Yishai Hadas)

- Fix an unlikely issue where a PCI device exposed to userspace with an
unknown capability at the base of the extended capability chain can
overflow an array index (Avihai Horon)

* tag 'vfio-v6.13-rc1' of https://github.com/awilliam/linux-vfio:
vfio/pci: Properly hide first-in-list PCIe extended capability
vfio/mlx5: Fix unwind flows in mlx5vf_pci_save/resume_device_data()
vfio/mlx5: Fix an unwind issue in mlx5vf_add_migration_pages()
vfio/virtio: Enable live migration once VIRTIO_PCI was configured
vfio/virtio: Add PRE_COPY support for live migration
vfio/virtio: Add support for the basic live migration functionality
virtio-pci: Introduce APIs to execute device parts admin commands
virtio: Manage device and driver capabilities via the admin commands
virtio: Extend the admin command to include the result size
virtio_pci: Introduce device parts access commands
Documentation: add debugfs description for hisi migration
hisi_acc_vfio_pci: register debugfs for hisilicon migration driver
hisi_acc_vfio_pci: create subfunction for data reading
hisi_acc_vfio_pci: extract public functions for container_of
vfio/qat: fix overflow check in qat_vf_resume_write()
vfio/nvgrace-gpu: Add a new GH200 SKU to the devid table
kvm/vfio: Constify struct kvm_device_ops

+2920 -475
+25
Documentation/ABI/testing/debugfs-hisi-migration
··· 1 + What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/dev_data 2 + Date: Jan 2025 3 + KernelVersion: 6.13 4 + Contact: Longfang Liu <liulongfang@huawei.com> 5 + Description: Read the configuration data and some status data 6 + required for device live migration. These data include device 7 + status data, queue configuration data, some task configuration 8 + data and device attribute data. The output format of the data 9 + is defined by the live migration driver. 10 + 11 + What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/migf_data 12 + Date: Jan 2025 13 + KernelVersion: 6.13 14 + Contact: Longfang Liu <liulongfang@huawei.com> 15 + Description: Read the data from the last completed live migration. 16 + This data includes the same device status data as in "dev_data". 17 + The migf_data is the dev_data that is migrated. 18 + 19 + What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/cmd_state 20 + Date: Jan 2025 21 + KernelVersion: 6.13 22 + Contact: Longfang Liu <liulongfang@huawei.com> 23 + Description: Used to obtain the device command sending and receiving 24 + channel status. Returns failure or success logs based on the 25 + results.
+235 -31
drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
··· 486 486 return 0; 487 487 } 488 488 489 - static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev, 490 - struct hisi_acc_vf_migration_file *migf) 489 + static int vf_qm_read_data(struct hisi_qm *vf_qm, struct acc_vf_data *vf_data) 491 490 { 492 - struct acc_vf_data *vf_data = &migf->vf_data; 493 - struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; 494 491 struct device *dev = &vf_qm->pdev->dev; 495 492 int ret; 496 - 497 - if (unlikely(qm_wait_dev_not_ready(vf_qm))) { 498 - /* Update state and return with match data */ 499 - vf_data->vf_qm_state = QM_NOT_READY; 500 - hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state; 501 - migf->total_length = QM_MATCH_SIZE; 502 - return 0; 503 - } 504 - 505 - vf_data->vf_qm_state = QM_READY; 506 - hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state; 507 - 508 - ret = vf_qm_cache_wb(vf_qm); 509 - if (ret) { 510 - dev_err(dev, "failed to writeback QM Cache!\n"); 511 - return ret; 512 - } 513 493 514 494 ret = qm_get_regs(vf_qm, vf_data); 515 495 if (ret) ··· 515 535 dev_err(dev, "failed to read CQC addr!\n"); 516 536 return -EINVAL; 517 537 } 538 + 539 + return 0; 540 + } 541 + 542 + static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev, 543 + struct hisi_acc_vf_migration_file *migf) 544 + { 545 + struct acc_vf_data *vf_data = &migf->vf_data; 546 + struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; 547 + struct device *dev = &vf_qm->pdev->dev; 548 + int ret; 549 + 550 + if (unlikely(qm_wait_dev_not_ready(vf_qm))) { 551 + /* Update state and return with match data */ 552 + vf_data->vf_qm_state = QM_NOT_READY; 553 + hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state; 554 + migf->total_length = QM_MATCH_SIZE; 555 + return 0; 556 + } 557 + 558 + vf_data->vf_qm_state = QM_READY; 559 + hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state; 560 + 561 + ret = vf_qm_cache_wb(vf_qm); 562 + if (ret) { 563 + dev_err(dev, "failed to writeback QM Cache!\n"); 564 + return ret; 565 + } 566 + 567 + ret = vf_qm_read_data(vf_qm, vf_data); 568 + if (ret) 569 + return -EINVAL; 518 570 519 571 migf->total_length = sizeof(struct acc_vf_data); 520 572 return 0; ··· 627 615 mutex_unlock(&migf->lock); 628 616 } 629 617 618 + static void 619 + hisi_acc_debug_migf_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev, 620 + struct hisi_acc_vf_migration_file *src_migf) 621 + { 622 + struct hisi_acc_vf_migration_file *dst_migf = hisi_acc_vdev->debug_migf; 623 + 624 + if (!dst_migf) 625 + return; 626 + 627 + dst_migf->total_length = src_migf->total_length; 628 + memcpy(&dst_migf->vf_data, &src_migf->vf_data, 629 + sizeof(struct acc_vf_data)); 630 + } 631 + 630 632 static void hisi_acc_vf_disable_fds(struct hisi_acc_vf_core_device *hisi_acc_vdev) 631 633 { 632 634 if (hisi_acc_vdev->resuming_migf) { 635 + hisi_acc_debug_migf_copy(hisi_acc_vdev, hisi_acc_vdev->resuming_migf); 633 636 hisi_acc_vf_disable_fd(hisi_acc_vdev->resuming_migf); 634 637 fput(hisi_acc_vdev->resuming_migf->filp); 635 638 hisi_acc_vdev->resuming_migf = NULL; 636 639 } 637 640 638 641 if (hisi_acc_vdev->saving_migf) { 642 + hisi_acc_debug_migf_copy(hisi_acc_vdev, hisi_acc_vdev->saving_migf); 639 643 hisi_acc_vf_disable_fd(hisi_acc_vdev->saving_migf); 640 644 fput(hisi_acc_vdev->saving_migf->filp); 641 645 hisi_acc_vdev->saving_migf = NULL; 642 646 } 647 + } 648 + 649 + static struct hisi_acc_vf_core_device *hisi_acc_get_vf_dev(struct vfio_device *vdev) 650 + { 651 + return container_of(vdev, struct hisi_acc_vf_core_device, 652 + core_device.vdev); 643 653 } 644 654 645 655 static void hisi_acc_vf_reset(struct hisi_acc_vf_core_device *hisi_acc_vdev) ··· 1065 1031 hisi_acc_vfio_pci_set_device_state(struct vfio_device *vdev, 1066 1032 enum vfio_device_mig_state new_state) 1067 1033 { 1068 - struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(vdev, 1069 - struct hisi_acc_vf_core_device, core_device.vdev); 1034 + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev); 1070 1035 enum vfio_device_mig_state next_state; 1071 1036 struct file *res = NULL; 1072 1037 int ret; ··· 1106 1073 hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev, 1107 1074 enum vfio_device_mig_state *curr_state) 1108 1075 { 1109 - struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(vdev, 1110 - struct hisi_acc_vf_core_device, core_device.vdev); 1076 + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev); 1111 1077 1112 1078 mutex_lock(&hisi_acc_vdev->state_mutex); 1113 1079 *curr_state = hisi_acc_vdev->mig_state; ··· 1308 1276 return vfio_pci_core_ioctl(core_vdev, cmd, arg); 1309 1277 } 1310 1278 1279 + static int hisi_acc_vf_debug_check(struct seq_file *seq, struct vfio_device *vdev) 1280 + { 1281 + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev); 1282 + struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; 1283 + int ret; 1284 + 1285 + lockdep_assert_held(&hisi_acc_vdev->open_mutex); 1286 + /* 1287 + * When the device is not opened, the io_base is not mapped. 1288 + * The driver cannot perform device read and write operations. 1289 + */ 1290 + if (!hisi_acc_vdev->dev_opened) { 1291 + seq_puts(seq, "device not opened!\n"); 1292 + return -EINVAL; 1293 + } 1294 + 1295 + ret = qm_wait_dev_not_ready(vf_qm); 1296 + if (ret) { 1297 + seq_puts(seq, "VF device not ready!\n"); 1298 + return -EBUSY; 1299 + } 1300 + 1301 + return 0; 1302 + } 1303 + 1304 + static int hisi_acc_vf_debug_cmd(struct seq_file *seq, void *data) 1305 + { 1306 + struct device *vf_dev = seq->private; 1307 + struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev); 1308 + struct vfio_device *vdev = &core_device->vdev; 1309 + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev); 1310 + struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; 1311 + u64 value; 1312 + int ret; 1313 + 1314 + mutex_lock(&hisi_acc_vdev->open_mutex); 1315 + ret = hisi_acc_vf_debug_check(seq, vdev); 1316 + if (ret) { 1317 + mutex_unlock(&hisi_acc_vdev->open_mutex); 1318 + return ret; 1319 + } 1320 + 1321 + value = readl(vf_qm->io_base + QM_MB_CMD_SEND_BASE); 1322 + if (value == QM_MB_CMD_NOT_READY) { 1323 + mutex_unlock(&hisi_acc_vdev->open_mutex); 1324 + seq_puts(seq, "mailbox cmd channel not ready!\n"); 1325 + return -EINVAL; 1326 + } 1327 + mutex_unlock(&hisi_acc_vdev->open_mutex); 1328 + seq_puts(seq, "mailbox cmd channel ready!\n"); 1329 + 1330 + return 0; 1331 + } 1332 + 1333 + static int hisi_acc_vf_dev_read(struct seq_file *seq, void *data) 1334 + { 1335 + struct device *vf_dev = seq->private; 1336 + struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev); 1337 + struct vfio_device *vdev = &core_device->vdev; 1338 + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev); 1339 + size_t vf_data_sz = offsetofend(struct acc_vf_data, padding); 1340 + struct acc_vf_data *vf_data; 1341 + int ret; 1342 + 1343 + mutex_lock(&hisi_acc_vdev->open_mutex); 1344 + ret = hisi_acc_vf_debug_check(seq, vdev); 1345 + if (ret) { 1346 + mutex_unlock(&hisi_acc_vdev->open_mutex); 1347 + return ret; 1348 + } 1349 + 1350 + mutex_lock(&hisi_acc_vdev->state_mutex); 1351 + vf_data = kzalloc(sizeof(*vf_data), GFP_KERNEL); 1352 + if (!vf_data) { 1353 + ret = -ENOMEM; 1354 + goto mutex_release; 1355 + } 1356 + 1357 + vf_data->vf_qm_state = hisi_acc_vdev->vf_qm_state; 1358 + ret = vf_qm_read_data(&hisi_acc_vdev->vf_qm, vf_data); 1359 + if (ret) 1360 + goto migf_err; 1361 + 1362 + seq_hex_dump(seq, "Dev Data:", DUMP_PREFIX_OFFSET, 16, 1, 1363 + (const void *)vf_data, vf_data_sz, false); 1364 + 1365 + seq_printf(seq, 1366 + "guest driver load: %u\n" 1367 + "data size: %lu\n", 1368 + hisi_acc_vdev->vf_qm_state, 1369 + sizeof(struct acc_vf_data)); 1370 + 1371 + migf_err: 1372 + kfree(vf_data); 1373 + mutex_release: 1374 + mutex_unlock(&hisi_acc_vdev->state_mutex); 1375 + mutex_unlock(&hisi_acc_vdev->open_mutex); 1376 + 1377 + return ret; 1378 + } 1379 + 1380 + static int hisi_acc_vf_migf_read(struct seq_file *seq, void *data) 1381 + { 1382 + struct device *vf_dev = seq->private; 1383 + struct vfio_pci_core_device *core_device = dev_get_drvdata(vf_dev); 1384 + struct vfio_device *vdev = &core_device->vdev; 1385 + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(vdev); 1386 + size_t vf_data_sz = offsetofend(struct acc_vf_data, padding); 1387 + struct hisi_acc_vf_migration_file *debug_migf = hisi_acc_vdev->debug_migf; 1388 + 1389 + /* Check whether the live migration operation has been performed */ 1390 + if (debug_migf->total_length < QM_MATCH_SIZE) { 1391 + seq_puts(seq, "device not migrated!\n"); 1392 + return -EAGAIN; 1393 + } 1394 + 1395 + seq_hex_dump(seq, "Mig Data:", DUMP_PREFIX_OFFSET, 16, 1, 1396 + (const void *)&debug_migf->vf_data, vf_data_sz, false); 1397 + seq_printf(seq, "migrate data length: %lu\n", debug_migf->total_length); 1398 + 1399 + return 0; 1400 + } 1401 + 1311 1402 static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev) 1312 1403 { 1313 - struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(core_vdev, 1314 - struct hisi_acc_vf_core_device, core_device.vdev); 1404 + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev); 1315 1405 struct vfio_pci_core_device *vdev = &hisi_acc_vdev->core_device; 1316 1406 int ret; 1317 1407 ··· 1442 1288 return ret; 1443 1289 1444 1290 if (core_vdev->mig_ops) { 1291 + mutex_lock(&hisi_acc_vdev->open_mutex); 1445 1292 ret = hisi_acc_vf_qm_init(hisi_acc_vdev); 1446 1293 if (ret) { 1294 + mutex_unlock(&hisi_acc_vdev->open_mutex); 1447 1295 vfio_pci_core_disable(vdev); 1448 1296 return ret; 1449 1297 } 1450 1298 hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING; 1299 + hisi_acc_vdev->dev_opened = true; 1300 + mutex_unlock(&hisi_acc_vdev->open_mutex); 1451 1301 } 1452 1302 1453 1303 vfio_pci_core_finish_enable(vdev); ··· 1460 1302 1461 1303 static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev) 1462 1304 { 1463 - struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(core_vdev, 1464 - struct hisi_acc_vf_core_device, core_device.vdev); 1305 + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev); 1465 1306 struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; 1466 1307 1308 + mutex_lock(&hisi_acc_vdev->open_mutex); 1309 + hisi_acc_vdev->dev_opened = false; 1467 1310 iounmap(vf_qm->io_base); 1311 + mutex_unlock(&hisi_acc_vdev->open_mutex); 1468 1312 vfio_pci_core_close_device(core_vdev); 1469 1313 } 1470 1314 ··· 1478 1318 1479 1319 static int hisi_acc_vfio_pci_migrn_init_dev(struct vfio_device *core_vdev) 1480 1320 { 1481 - struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(core_vdev, 1482 - struct hisi_acc_vf_core_device, core_device.vdev); 1321 + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev); 1483 1322 struct pci_dev *pdev = to_pci_dev(core_vdev->dev); 1484 1323 struct hisi_qm *pf_qm = hisi_acc_get_pf_qm(pdev); 1485 1324 ··· 1486 1327 hisi_acc_vdev->pf_qm = pf_qm; 1487 1328 hisi_acc_vdev->vf_dev = pdev; 1488 1329 mutex_init(&hisi_acc_vdev->state_mutex); 1330 + mutex_init(&hisi_acc_vdev->open_mutex); 1489 1331 1490 1332 core_vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY; 1491 1333 core_vdev->mig_ops = &hisi_acc_vfio_pci_migrn_state_ops; ··· 1532 1372 .detach_ioas = vfio_iommufd_physical_detach_ioas, 1533 1373 }; 1534 1374 1375 + static void hisi_acc_vfio_debug_init(struct hisi_acc_vf_core_device *hisi_acc_vdev) 1376 + { 1377 + struct vfio_device *vdev = &hisi_acc_vdev->core_device.vdev; 1378 + struct hisi_acc_vf_migration_file *migf; 1379 + struct dentry *vfio_dev_migration; 1380 + struct dentry *vfio_hisi_acc; 1381 + struct device *dev = vdev->dev; 1382 + 1383 + if (!debugfs_initialized() || 1384 + !IS_ENABLED(CONFIG_VFIO_DEBUGFS)) 1385 + return; 1386 + 1387 + if (vdev->ops != &hisi_acc_vfio_pci_migrn_ops) 1388 + return; 1389 + 1390 + vfio_dev_migration = debugfs_lookup("migration", vdev->debug_root); 1391 + if (!vfio_dev_migration) { 1392 + dev_err(dev, "failed to lookup migration debugfs file!\n"); 1393 + return; 1394 + } 1395 + 1396 + migf = kzalloc(sizeof(*migf), GFP_KERNEL); 1397 + if (!migf) 1398 + return; 1399 + hisi_acc_vdev->debug_migf = migf; 1400 + 1401 + vfio_hisi_acc = debugfs_create_dir("hisi_acc", vfio_dev_migration); 1402 + debugfs_create_devm_seqfile(dev, "dev_data", vfio_hisi_acc, 1403 + hisi_acc_vf_dev_read); 1404 + debugfs_create_devm_seqfile(dev, "migf_data", vfio_hisi_acc, 1405 + hisi_acc_vf_migf_read); 1406 + debugfs_create_devm_seqfile(dev, "cmd_state", vfio_hisi_acc, 1407 + hisi_acc_vf_debug_cmd); 1408 + } 1409 + 1410 + static void hisi_acc_vf_debugfs_exit(struct hisi_acc_vf_core_device *hisi_acc_vdev) 1411 + { 1412 + kfree(hisi_acc_vdev->debug_migf); 1413 + hisi_acc_vdev->debug_migf = NULL; 1414 + } 1415 + 1535 1416 static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 1536 1417 { 1537 1418 struct hisi_acc_vf_core_device *hisi_acc_vdev; ··· 1599 1398 ret = vfio_pci_core_register_device(&hisi_acc_vdev->core_device); 1600 1399 if (ret) 1601 1400 goto out_put_vdev; 1401 + 1402 + hisi_acc_vfio_debug_init(hisi_acc_vdev); 1602 1403 return 0; 1603 1404 1604 1405 out_put_vdev: ··· 1613 1410 struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev); 1614 1411 1615 1412 vfio_pci_core_unregister_device(&hisi_acc_vdev->core_device); 1413 + hisi_acc_vf_debugfs_exit(hisi_acc_vdev); 1616 1414 vfio_put_device(&hisi_acc_vdev->core_device.vdev); 1617 1415 } 1618 1416
+19
drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
··· 32 32 #define QM_SQC_VFT_BASE_MASK_V2 GENMASK(15, 0) 33 33 #define QM_SQC_VFT_NUM_SHIFT_V2 45 34 34 #define QM_SQC_VFT_NUM_MASK_V2 GENMASK(9, 0) 35 + #define QM_MB_CMD_NOT_READY 0xffffffff 35 36 36 37 /* RW regs */ 37 38 #define QM_REGS_MAX_LEN 7 ··· 100 99 struct hisi_acc_vf_core_device { 101 100 struct vfio_pci_core_device core_device; 102 101 u8 match_done; 102 + /* 103 + * io_base is only valid when dev_opened is true, 104 + * which is protected by open_mutex. 105 + */ 106 + bool dev_opened; 107 + /* Ensure the accuracy of dev_opened operation */ 108 + struct mutex open_mutex; 103 109 104 110 /* For migration state */ 105 111 struct mutex state_mutex; ··· 115 107 struct pci_dev *vf_dev; 116 108 struct hisi_qm *pf_qm; 117 109 struct hisi_qm vf_qm; 110 + /* 111 + * vf_qm_state represents the QM_VF_STATE register value. 112 + * It is set by Guest driver for the ACC VF dev indicating 113 + * the driver has loaded and configured the dev correctly. 114 + */ 118 115 u32 vf_qm_state; 119 116 int vf_id; 120 117 struct hisi_acc_vf_migration_file *resuming_migf; 121 118 struct hisi_acc_vf_migration_file *saving_migf; 119 + 120 + /* 121 + * It holds migration data corresponding to the last migration 122 + * and is used by the debugfs interface to report it. 123 + */ 124 + struct hisi_acc_vf_migration_file *debug_migf; 122 125 }; 123 126 #endif /* HISI_ACC_VFIO_PCI_H */
+5 -1
drivers/vfio/pci/mlx5/cmd.c
··· 423 423 unsigned long filled; 424 424 unsigned int to_fill; 425 425 int ret; 426 + int i; 426 427 427 428 to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); 428 429 page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT); ··· 444 443 GFP_KERNEL_ACCOUNT); 445 444 446 445 if (ret) 447 - goto err; 446 + goto err_append; 448 447 buf->allocated_length += filled * PAGE_SIZE; 449 448 /* clean input for another bulk allocation */ 450 449 memset(page_list, 0, filled * sizeof(*page_list)); ··· 455 454 kvfree(page_list); 456 455 return 0; 457 456 457 + err_append: 458 + for (i = filled - 1; i >= 0; i--) 459 + __free_page(page_list[i]); 458 460 err: 459 461 kvfree(page_list); 460 462 return ret;
+17 -18
drivers/vfio/pci/mlx5/main.c
··· 640 640 O_RDONLY); 641 641 if (IS_ERR(migf->filp)) { 642 642 ret = PTR_ERR(migf->filp); 643 - goto end; 643 + kfree(migf); 644 + return ERR_PTR(ret); 644 645 } 645 646 646 647 migf->mvdev = mvdev; 647 - ret = mlx5vf_cmd_alloc_pd(migf); 648 - if (ret) 649 - goto out_free; 650 - 651 648 stream_open(migf->filp->f_inode, migf->filp); 652 649 mutex_init(&migf->lock); 653 650 init_waitqueue_head(&migf->poll_wait); ··· 660 663 INIT_LIST_HEAD(&migf->buf_list); 661 664 INIT_LIST_HEAD(&migf->avail_list); 662 665 spin_lock_init(&migf->list_lock); 666 + 667 + ret = mlx5vf_cmd_alloc_pd(migf); 668 + if (ret) 669 + goto out; 670 + 663 671 ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, &full_size, 0); 664 672 if (ret) 665 673 goto out_pd; ··· 694 692 mlx5vf_free_data_buffer(buf); 695 693 out_pd: 696 694 mlx5fv_cmd_clean_migf_resources(migf); 697 - out_free: 695 + out: 698 696 fput(migf->filp); 699 - end: 700 - kfree(migf); 701 697 return ERR_PTR(ret); 702 698 } 703 699 ··· 1016 1016 O_WRONLY); 1017 1017 if (IS_ERR(migf->filp)) { 1018 1018 ret = PTR_ERR(migf->filp); 1019 - goto end; 1019 + kfree(migf); 1020 + return ERR_PTR(ret); 1020 1021 } 1021 1022 1023 + stream_open(migf->filp->f_inode, migf->filp); 1024 + mutex_init(&migf->lock); 1025 + INIT_LIST_HEAD(&migf->buf_list); 1026 + INIT_LIST_HEAD(&migf->avail_list); 1027 + spin_lock_init(&migf->list_lock); 1022 1028 migf->mvdev = mvdev; 1023 1029 ret = mlx5vf_cmd_alloc_pd(migf); 1024 1030 if (ret) 1025 - goto out_free; 1031 + goto out; 1026 1032 1027 1033 buf = mlx5vf_alloc_data_buffer(migf, 0, DMA_TO_DEVICE); 1028 1034 if (IS_ERR(buf)) { ··· 1047 1041 migf->buf_header[0] = buf; 1048 1042 migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; 1049 1043 1050 - stream_open(migf->filp->f_inode, migf->filp); 1051 - mutex_init(&migf->lock); 1052 - INIT_LIST_HEAD(&migf->buf_list); 1053 - INIT_LIST_HEAD(&migf->avail_list); 1054 - spin_lock_init(&migf->list_lock); 1055 1044 return migf; 1056 1045 out_buf: 1057 1046 mlx5vf_free_data_buffer(migf->buf[0]); 1058 1047 out_pd: 1059 1048 mlx5vf_cmd_dealloc_pd(migf); 1060 - out_free: 1049 + out: 1061 1050 fput(migf->filp); 1062 - end: 1063 - kfree(migf); 1064 1051 return ERR_PTR(ret); 1065 1052 } 1066 1053
+2
drivers/vfio/pci/nvgrace-gpu/main.c
··· 866 866 { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2342) }, 867 867 /* GH200 480GB */ 868 868 { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2345) }, 869 + /* GH200 SKU */ 870 + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2348) }, 869 871 {} 870 872 }; 871 873
+1 -1
drivers/vfio/pci/qat/main.c
··· 304 304 offs = &filp->f_pos; 305 305 306 306 if (*offs < 0 || 307 - check_add_overflow((loff_t)len, *offs, &end)) 307 + check_add_overflow(len, *offs, &end)) 308 308 return -EOVERFLOW; 309 309 310 310 if (end > mig_dev->state_size)
+14 -2
drivers/vfio/pci/vfio_pci_config.c
··· 313 313 return count; 314 314 } 315 315 316 + static struct perm_bits direct_ro_perms = { 317 + .readfn = vfio_direct_config_read, 318 + }; 319 + 316 320 /* Default capability regions to read-only, no-virtualization */ 317 321 static struct perm_bits cap_perms[PCI_CAP_ID_MAX + 1] = { 318 322 [0 ... PCI_CAP_ID_MAX] = { .readfn = vfio_direct_config_read } ··· 1901 1897 cap_start = *ppos; 1902 1898 } else { 1903 1899 if (*ppos >= PCI_CFG_SPACE_SIZE) { 1904 - WARN_ON(cap_id > PCI_EXT_CAP_ID_MAX); 1900 + /* 1901 + * We can get a cap_id that exceeds PCI_EXT_CAP_ID_MAX 1902 + * if we're hiding an unknown capability at the start 1903 + * of the extended capability list. Use default, ro 1904 + * access, which will virtualize the id and next values. 1905 + */ 1906 + if (cap_id > PCI_EXT_CAP_ID_MAX) 1907 + perm = &direct_ro_perms; 1908 + else 1909 + perm = &ecap_perms[cap_id]; 1905 1910 1906 - perm = &ecap_perms[cap_id]; 1907 1911 cap_start = vfio_find_cap_start(vdev, *ppos); 1908 1912 } else { 1909 1913 WARN_ON(cap_id > PCI_CAP_ID_MAX);
+28 -12
drivers/vfio/pci/virtio/Kconfig
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 2 config VIRTIO_VFIO_PCI 3 - tristate "VFIO support for VIRTIO NET PCI devices" 4 - depends on VIRTIO_PCI && VIRTIO_PCI_ADMIN_LEGACY 5 - select VFIO_PCI_CORE 6 - help 7 - This provides support for exposing VIRTIO NET VF devices which support 8 - legacy IO access, using the VFIO framework that can work with a legacy 9 - virtio driver in the guest. 10 - Based on PCIe spec, VFs do not support I/O Space. 11 - As of that this driver emulates I/O BAR in software to let a VF be 12 - seen as a transitional device by its users and let it work with 13 - a legacy driver. 3 + tristate "VFIO support for VIRTIO NET PCI VF devices" 4 + depends on VIRTIO_PCI 5 + select VFIO_PCI_CORE 6 + help 7 + This provides migration support for VIRTIO NET PCI VF devices 8 + using the VFIO framework. Migration support requires the 9 + SR-IOV PF device to support specific VIRTIO extensions, 10 + otherwise this driver provides no additional functionality 11 + beyond vfio-pci. 14 12 15 - If you don't know what to do here, say N. 13 + Migration support in this driver relies on dirty page tracking 14 + provided by the IOMMU hardware and exposed through IOMMUFD, any 15 + other use cases are dis-recommended. 16 + 17 + If you don't know what to do here, say N. 18 + 19 + config VIRTIO_VFIO_PCI_ADMIN_LEGACY 20 + bool "Legacy I/O support for VIRTIO NET PCI VF devices" 21 + depends on VIRTIO_VFIO_PCI && VIRTIO_PCI_ADMIN_LEGACY 22 + default y 23 + help 24 + This extends the virtio-vfio-pci driver to support legacy I/O 25 + access, allowing use of legacy virtio drivers with VIRTIO NET 26 + PCI VF devices. Legacy I/O support requires the SR-IOV PF 27 + device to support and enable specific VIRTIO extensions, 28 + otherwise this driver provides no additional functionality 29 + beyond vfio-pci. 30 + 31 + If you don't know what to do here, say N.
+2 -1
drivers/vfio/pci/virtio/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 2 obj-$(CONFIG_VIRTIO_VFIO_PCI) += virtio-vfio-pci.o 3 - virtio-vfio-pci-y := main.o 3 + virtio-vfio-pci-y := main.o migrate.o 4 + virtio-vfio-pci-$(CONFIG_VIRTIO_VFIO_PCI_ADMIN_LEGACY) += legacy_io.o
+127
drivers/vfio/pci/virtio/common.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + 3 + #ifndef VIRTIO_VFIO_COMMON_H 4 + #define VIRTIO_VFIO_COMMON_H 5 + 6 + #include <linux/kernel.h> 7 + #include <linux/virtio.h> 8 + #include <linux/vfio_pci_core.h> 9 + #include <linux/virtio_pci.h> 10 + 11 + enum virtiovf_migf_state { 12 + VIRTIOVF_MIGF_STATE_ERROR = 1, 13 + VIRTIOVF_MIGF_STATE_PRECOPY = 2, 14 + VIRTIOVF_MIGF_STATE_COMPLETE = 3, 15 + }; 16 + 17 + enum virtiovf_load_state { 18 + VIRTIOVF_LOAD_STATE_READ_HEADER, 19 + VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA, 20 + VIRTIOVF_LOAD_STATE_READ_HEADER_DATA, 21 + VIRTIOVF_LOAD_STATE_PREP_CHUNK, 22 + VIRTIOVF_LOAD_STATE_READ_CHUNK, 23 + VIRTIOVF_LOAD_STATE_LOAD_CHUNK, 24 + }; 25 + 26 + struct virtiovf_data_buffer { 27 + struct sg_append_table table; 28 + loff_t start_pos; 29 + u64 length; 30 + u64 allocated_length; 31 + struct list_head buf_elm; 32 + u8 include_header_object:1; 33 + struct virtiovf_migration_file *migf; 34 + /* Optimize virtiovf_get_migration_page() for sequential access */ 35 + struct scatterlist *last_offset_sg; 36 + unsigned int sg_last_entry; 37 + unsigned long last_offset; 38 + }; 39 + 40 + enum virtiovf_migf_header_flags { 41 + VIRTIOVF_MIGF_HEADER_FLAGS_TAG_MANDATORY = 0, 42 + VIRTIOVF_MIGF_HEADER_FLAGS_TAG_OPTIONAL = 1 << 0, 43 + }; 44 + 45 + enum virtiovf_migf_header_tag { 46 + VIRTIOVF_MIGF_HEADER_TAG_DEVICE_DATA = 0, 47 + }; 48 + 49 + struct virtiovf_migration_header { 50 + __le64 record_size; 51 + /* For future use in case we may need to change the kernel protocol */ 52 + __le32 flags; /* Use virtiovf_migf_header_flags */ 53 + __le32 tag; /* Use virtiovf_migf_header_tag */ 54 + __u8 data[]; /* Its size is given in the record_size */ 55 + }; 56 + 57 + struct virtiovf_migration_file { 58 + struct file *filp; 59 + /* synchronize access to the file state */ 60 + struct mutex lock; 61 + loff_t max_pos; 62 + u64 pre_copy_initial_bytes; 63 + struct ratelimit_state pre_copy_rl_state; 64 + u64 record_size; 65 + u32 record_tag; 66 + u8 has_obj_id:1; 67 + u32 obj_id; 68 + enum virtiovf_migf_state state; 69 + enum virtiovf_load_state load_state; 70 + /* synchronize access to the lists */ 71 + spinlock_t list_lock; 72 + struct list_head buf_list; 73 + struct list_head avail_list; 74 + struct virtiovf_data_buffer *buf; 75 + struct virtiovf_data_buffer *buf_header; 76 + struct virtiovf_pci_core_device *virtvdev; 77 + }; 78 + 79 + struct virtiovf_pci_core_device { 80 + struct vfio_pci_core_device core_device; 81 + #ifdef CONFIG_VIRTIO_VFIO_PCI_ADMIN_LEGACY 82 + u8 *bar0_virtual_buf; 83 + /* synchronize access to the virtual buf */ 84 + struct mutex bar_mutex; 85 + void __iomem *notify_addr; 86 + u64 notify_offset; 87 + __le32 pci_base_addr_0; 88 + __le16 pci_cmd; 89 + u8 bar0_virtual_buf_size; 90 + u8 notify_bar; 91 + #endif 92 + 93 + /* LM related */ 94 + u8 migrate_cap:1; 95 + u8 deferred_reset:1; 96 + /* protect migration state */ 97 + struct mutex state_mutex; 98 + enum vfio_device_mig_state mig_state; 99 + /* protect the reset_done flow */ 100 + spinlock_t reset_lock; 101 + struct virtiovf_migration_file *resuming_migf; 102 + struct virtiovf_migration_file *saving_migf; 103 + }; 104 + 105 + void virtiovf_set_migratable(struct virtiovf_pci_core_device *virtvdev); 106 + void virtiovf_open_migration(struct virtiovf_pci_core_device *virtvdev); 107 + void virtiovf_close_migration(struct virtiovf_pci_core_device *virtvdev); 108 + void virtiovf_migration_reset_done(struct pci_dev *pdev); 109 + 110 + #ifdef CONFIG_VIRTIO_VFIO_PCI_ADMIN_LEGACY 111 + int virtiovf_open_legacy_io(struct virtiovf_pci_core_device *virtvdev); 112 + long virtiovf_vfio_pci_core_ioctl(struct vfio_device *core_vdev, 113 + unsigned int cmd, unsigned long arg); 114 + int virtiovf_pci_ioctl_get_region_info(struct vfio_device *core_vdev, 115 + unsigned int cmd, unsigned long arg); 116 + ssize_t virtiovf_pci_core_write(struct vfio_device *core_vdev, 117 + const char __user *buf, size_t count, 118 + loff_t *ppos); 119 + ssize_t virtiovf_pci_core_read(struct vfio_device *core_vdev, char __user *buf, 120 + size_t count, loff_t *ppos); 121 + bool virtiovf_support_legacy_io(struct pci_dev *pdev); 122 + int virtiovf_init_legacy_io(struct virtiovf_pci_core_device *virtvdev); 123 + void virtiovf_release_legacy_io(struct virtiovf_pci_core_device *virtvdev); 124 + void virtiovf_legacy_io_reset_done(struct pci_dev *pdev); 125 + #endif 126 + 127 + #endif /* VIRTIO_VFIO_COMMON_H */
+418
drivers/vfio/pci/virtio/legacy_io.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved 4 + */ 5 + 6 + #include <linux/device.h> 7 + #include <linux/module.h> 8 + #include <linux/mutex.h> 9 + #include <linux/pci.h> 10 + #include <linux/pm_runtime.h> 11 + #include <linux/types.h> 12 + #include <linux/uaccess.h> 13 + #include <linux/vfio.h> 14 + #include <linux/vfio_pci_core.h> 15 + #include <linux/virtio_pci.h> 16 + #include <linux/virtio_net.h> 17 + #include <linux/virtio_pci_admin.h> 18 + 19 + #include "common.h" 20 + 21 + static int 22 + virtiovf_issue_legacy_rw_cmd(struct virtiovf_pci_core_device *virtvdev, 23 + loff_t pos, char __user *buf, 24 + size_t count, bool read) 25 + { 26 + bool msix_enabled = 27 + (virtvdev->core_device.irq_type == VFIO_PCI_MSIX_IRQ_INDEX); 28 + struct pci_dev *pdev = virtvdev->core_device.pdev; 29 + u8 *bar0_buf = virtvdev->bar0_virtual_buf; 30 + bool common; 31 + u8 offset; 32 + int ret; 33 + 34 + common = pos < VIRTIO_PCI_CONFIG_OFF(msix_enabled); 35 + /* offset within the relevant configuration area */ 36 + offset = common ? pos : pos - VIRTIO_PCI_CONFIG_OFF(msix_enabled); 37 + mutex_lock(&virtvdev->bar_mutex); 38 + if (read) { 39 + if (common) 40 + ret = virtio_pci_admin_legacy_common_io_read(pdev, offset, 41 + count, bar0_buf + pos); 42 + else 43 + ret = virtio_pci_admin_legacy_device_io_read(pdev, offset, 44 + count, bar0_buf + pos); 45 + if (ret) 46 + goto out; 47 + if (copy_to_user(buf, bar0_buf + pos, count)) 48 + ret = -EFAULT; 49 + } else { 50 + if (copy_from_user(bar0_buf + pos, buf, count)) { 51 + ret = -EFAULT; 52 + goto out; 53 + } 54 + 55 + if (common) 56 + ret = virtio_pci_admin_legacy_common_io_write(pdev, offset, 57 + count, bar0_buf + pos); 58 + else 59 + ret = virtio_pci_admin_legacy_device_io_write(pdev, offset, 60 + count, bar0_buf + pos); 61 + } 62 + out: 63 + mutex_unlock(&virtvdev->bar_mutex); 64 + return ret; 65 + } 66 + 67 + static int 68 + virtiovf_pci_bar0_rw(struct virtiovf_pci_core_device *virtvdev, 69 + loff_t pos, char __user *buf, 70 + size_t count, bool read) 71 + { 72 + struct vfio_pci_core_device *core_device = &virtvdev->core_device; 73 + struct pci_dev *pdev = core_device->pdev; 74 + u16 queue_notify; 75 + int ret; 76 + 77 + if (!(le16_to_cpu(virtvdev->pci_cmd) & PCI_COMMAND_IO)) 78 + return -EIO; 79 + 80 + if (pos + count > virtvdev->bar0_virtual_buf_size) 81 + return -EINVAL; 82 + 83 + ret = pm_runtime_resume_and_get(&pdev->dev); 84 + if (ret) { 85 + pci_info_ratelimited(pdev, "runtime resume failed %d\n", ret); 86 + return -EIO; 87 + } 88 + 89 + switch (pos) { 90 + case VIRTIO_PCI_QUEUE_NOTIFY: 91 + if (count != sizeof(queue_notify)) { 92 + ret = -EINVAL; 93 + goto end; 94 + } 95 + if (read) { 96 + ret = vfio_pci_core_ioread16(core_device, true, &queue_notify, 97 + virtvdev->notify_addr); 98 + if (ret) 99 + goto end; 100 + if (copy_to_user(buf, &queue_notify, 101 + sizeof(queue_notify))) { 102 + ret = -EFAULT; 103 + goto end; 104 + } 105 + } else { 106 + if (copy_from_user(&queue_notify, buf, count)) { 107 + ret = -EFAULT; 108 + goto end; 109 + } 110 + ret = vfio_pci_core_iowrite16(core_device, true, queue_notify, 111 + virtvdev->notify_addr); 112 + } 113 + break; 114 + default: 115 + ret = virtiovf_issue_legacy_rw_cmd(virtvdev, pos, buf, count, 116 + read); 117 + } 118 + 119 + end: 120 + pm_runtime_put(&pdev->dev); 121 + return ret ? ret : count; 122 + } 123 + 124 + static ssize_t virtiovf_pci_read_config(struct vfio_device *core_vdev, 125 + char __user *buf, size_t count, 126 + loff_t *ppos) 127 + { 128 + struct virtiovf_pci_core_device *virtvdev = container_of( 129 + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); 130 + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; 131 + size_t register_offset; 132 + loff_t copy_offset; 133 + size_t copy_count; 134 + __le32 val32; 135 + __le16 val16; 136 + u8 val8; 137 + int ret; 138 + 139 + ret = vfio_pci_core_read(core_vdev, buf, count, ppos); 140 + if (ret < 0) 141 + return ret; 142 + 143 + if (vfio_pci_core_range_intersect_range(pos, count, PCI_DEVICE_ID, 144 + sizeof(val16), &copy_offset, 145 + &copy_count, &register_offset)) { 146 + val16 = cpu_to_le16(VIRTIO_TRANS_ID_NET); 147 + if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset, copy_count)) 148 + return -EFAULT; 149 + } 150 + 151 + if ((le16_to_cpu(virtvdev->pci_cmd) & PCI_COMMAND_IO) && 152 + vfio_pci_core_range_intersect_range(pos, count, PCI_COMMAND, 153 + sizeof(val16), &copy_offset, 154 + &copy_count, &register_offset)) { 155 + if (copy_from_user((void *)&val16 + register_offset, buf + copy_offset, 156 + copy_count)) 157 + return -EFAULT; 158 + val16 |= cpu_to_le16(PCI_COMMAND_IO); 159 + if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset, 160 + copy_count)) 161 + return -EFAULT; 162 + } 163 + 164 + if (vfio_pci_core_range_intersect_range(pos, count, PCI_REVISION_ID, 165 + sizeof(val8), &copy_offset, 166 + &copy_count, &register_offset)) { 167 + /* Transional needs to have revision 0 */ 168 + val8 = 0; 169 + if (copy_to_user(buf + copy_offset, &val8, copy_count)) 170 + return -EFAULT; 171 + } 172 + 173 + if (vfio_pci_core_range_intersect_range(pos, count, PCI_BASE_ADDRESS_0, 174 + sizeof(val32), &copy_offset, 175 + &copy_count, &register_offset)) { 176 + u32 bar_mask = ~(virtvdev->bar0_virtual_buf_size - 1); 177 + u32 pci_base_addr_0 = le32_to_cpu(virtvdev->pci_base_addr_0); 178 + 179 + val32 = cpu_to_le32((pci_base_addr_0 & bar_mask) | PCI_BASE_ADDRESS_SPACE_IO); 180 + if (copy_to_user(buf + copy_offset, (void *)&val32 + register_offset, copy_count)) 181 + return -EFAULT; 182 + } 183 + 184 + if (vfio_pci_core_range_intersect_range(pos, count, PCI_SUBSYSTEM_ID, 185 + sizeof(val16), &copy_offset, 186 + &copy_count, &register_offset)) { 187 + /* 188 + * Transitional devices use the PCI subsystem device id as 189 + * virtio device id, same as legacy driver always did. 190 + */ 191 + val16 = cpu_to_le16(VIRTIO_ID_NET); 192 + if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset, 193 + copy_count)) 194 + return -EFAULT; 195 + } 196 + 197 + if (vfio_pci_core_range_intersect_range(pos, count, PCI_SUBSYSTEM_VENDOR_ID, 198 + sizeof(val16), &copy_offset, 199 + &copy_count, &register_offset)) { 200 + val16 = cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET); 201 + if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset, 202 + copy_count)) 203 + return -EFAULT; 204 + } 205 + 206 + return count; 207 + } 208 + 209 + ssize_t virtiovf_pci_core_read(struct vfio_device *core_vdev, char __user *buf, 210 + size_t count, loff_t *ppos) 211 + { 212 + struct virtiovf_pci_core_device *virtvdev = container_of( 213 + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); 214 + unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); 215 + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; 216 + 217 + if (!count) 218 + return 0; 219 + 220 + if (index == VFIO_PCI_CONFIG_REGION_INDEX) 221 + return virtiovf_pci_read_config(core_vdev, buf, count, ppos); 222 + 223 + if (index == VFIO_PCI_BAR0_REGION_INDEX) 224 + return virtiovf_pci_bar0_rw(virtvdev, pos, buf, count, true); 225 + 226 + return vfio_pci_core_read(core_vdev, buf, count, ppos); 227 + } 228 + 229 + static ssize_t virtiovf_pci_write_config(struct vfio_device *core_vdev, 230 + const char __user *buf, size_t count, 231 + loff_t *ppos) 232 + { 233 + struct virtiovf_pci_core_device *virtvdev = container_of( 234 + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); 235 + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; 236 + size_t register_offset; 237 + loff_t copy_offset; 238 + size_t copy_count; 239 + 240 + if (vfio_pci_core_range_intersect_range(pos, count, PCI_COMMAND, 241 + sizeof(virtvdev->pci_cmd), 242 + &copy_offset, &copy_count, 243 + &register_offset)) { 244 + if (copy_from_user((void *)&virtvdev->pci_cmd + register_offset, 245 + buf + copy_offset, 246 + copy_count)) 247 + return -EFAULT; 248 + } 249 + 250 + if (vfio_pci_core_range_intersect_range(pos, count, PCI_BASE_ADDRESS_0, 251 + sizeof(virtvdev->pci_base_addr_0), 252 + &copy_offset, &copy_count, 253 + &register_offset)) { 254 + if (copy_from_user((void *)&virtvdev->pci_base_addr_0 + register_offset, 255 + buf + copy_offset, 256 + copy_count)) 257 + return -EFAULT; 258 + } 259 + 260 + return vfio_pci_core_write(core_vdev, buf, count, ppos); 261 + } 262 + 263 + ssize_t virtiovf_pci_core_write(struct vfio_device *core_vdev, const char __user *buf, 264 + size_t count, loff_t *ppos) 265 + { 266 + struct virtiovf_pci_core_device *virtvdev = container_of( 267 + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); 268 + unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); 269 + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; 270 + 271 + if (!count) 272 + return 0; 273 + 274 + if (index == VFIO_PCI_CONFIG_REGION_INDEX) 275 + return virtiovf_pci_write_config(core_vdev, buf, count, ppos); 276 + 277 + if (index == VFIO_PCI_BAR0_REGION_INDEX) 278 + return virtiovf_pci_bar0_rw(virtvdev, pos, (char __user *)buf, count, false); 279 + 280 + return vfio_pci_core_write(core_vdev, buf, count, ppos); 281 + } 282 + 283 + int virtiovf_pci_ioctl_get_region_info(struct vfio_device *core_vdev, 284 + unsigned int cmd, unsigned long arg) 285 + { 286 + struct virtiovf_pci_core_device *virtvdev = container_of( 287 + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); 288 + unsigned long minsz = offsetofend(struct vfio_region_info, offset); 289 + void __user *uarg = (void __user *)arg; 290 + struct vfio_region_info info = {}; 291 + 292 + if (copy_from_user(&info, uarg, minsz)) 293 + return -EFAULT; 294 + 295 + if (info.argsz < minsz) 296 + return -EINVAL; 297 + 298 + switch (info.index) { 299 + case VFIO_PCI_BAR0_REGION_INDEX: 300 + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 301 + info.size = virtvdev->bar0_virtual_buf_size; 302 + info.flags = VFIO_REGION_INFO_FLAG_READ | 303 + VFIO_REGION_INFO_FLAG_WRITE; 304 + return copy_to_user(uarg, &info, minsz) ? -EFAULT : 0; 305 + default: 306 + return vfio_pci_core_ioctl(core_vdev, cmd, arg); 307 + } 308 + } 309 + 310 + long virtiovf_vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, 311 + unsigned long arg) 312 + { 313 + switch (cmd) { 314 + case VFIO_DEVICE_GET_REGION_INFO: 315 + return virtiovf_pci_ioctl_get_region_info(core_vdev, cmd, arg); 316 + default: 317 + return vfio_pci_core_ioctl(core_vdev, cmd, arg); 318 + } 319 + } 320 + 321 + static int virtiovf_set_notify_addr(struct virtiovf_pci_core_device *virtvdev) 322 + { 323 + struct vfio_pci_core_device *core_device = &virtvdev->core_device; 324 + int ret; 325 + 326 + /* 327 + * Setup the BAR where the 'notify' exists to be used by vfio as well 328 + * This will let us mmap it only once and use it when needed. 329 + */ 330 + ret = vfio_pci_core_setup_barmap(core_device, 331 + virtvdev->notify_bar); 332 + if (ret) 333 + return ret; 334 + 335 + virtvdev->notify_addr = core_device->barmap[virtvdev->notify_bar] + 336 + virtvdev->notify_offset; 337 + return 0; 338 + } 339 + 340 + int virtiovf_open_legacy_io(struct virtiovf_pci_core_device *virtvdev) 341 + { 342 + if (!virtvdev->bar0_virtual_buf) 343 + return 0; 344 + 345 + /* 346 + * Upon close_device() the vfio_pci_core_disable() is called 347 + * and will close all the previous mmaps, so it seems that the 348 + * valid life cycle for the 'notify' addr is per open/close. 349 + */ 350 + return virtiovf_set_notify_addr(virtvdev); 351 + } 352 + 353 + static int virtiovf_get_device_config_size(unsigned short device) 354 + { 355 + /* Network card */ 356 + return offsetofend(struct virtio_net_config, status); 357 + } 358 + 359 + static int virtiovf_read_notify_info(struct virtiovf_pci_core_device *virtvdev) 360 + { 361 + u64 offset; 362 + int ret; 363 + u8 bar; 364 + 365 + ret = virtio_pci_admin_legacy_io_notify_info(virtvdev->core_device.pdev, 366 + VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_OWNER_MEM, 367 + &bar, &offset); 368 + if (ret) 369 + return ret; 370 + 371 + virtvdev->notify_bar = bar; 372 + virtvdev->notify_offset = offset; 373 + return 0; 374 + } 375 + 376 + static bool virtiovf_bar0_exists(struct pci_dev *pdev) 377 + { 378 + struct resource *res = pdev->resource; 379 + 380 + return res->flags; 381 + } 382 + 383 + bool virtiovf_support_legacy_io(struct pci_dev *pdev) 384 + { 385 + return virtio_pci_admin_has_legacy_io(pdev) && !virtiovf_bar0_exists(pdev); 386 + } 387 + 388 + int virtiovf_init_legacy_io(struct virtiovf_pci_core_device *virtvdev) 389 + { 390 + struct pci_dev *pdev = virtvdev->core_device.pdev; 391 + int ret; 392 + 393 + ret = virtiovf_read_notify_info(virtvdev); 394 + if (ret) 395 + return ret; 396 + 397 + virtvdev->bar0_virtual_buf_size = VIRTIO_PCI_CONFIG_OFF(true) + 398 + virtiovf_get_device_config_size(pdev->device); 399 + BUILD_BUG_ON(!is_power_of_2(virtvdev->bar0_virtual_buf_size)); 400 + virtvdev->bar0_virtual_buf = kzalloc(virtvdev->bar0_virtual_buf_size, 401 + GFP_KERNEL); 402 + if (!virtvdev->bar0_virtual_buf) 403 + return -ENOMEM; 404 + mutex_init(&virtvdev->bar_mutex); 405 + return 0; 406 + } 407 + 408 + void virtiovf_release_legacy_io(struct virtiovf_pci_core_device *virtvdev) 409 + { 410 + kfree(virtvdev->bar0_virtual_buf); 411 + } 412 + 413 + void virtiovf_legacy_io_reset_done(struct pci_dev *pdev) 414 + { 415 + struct virtiovf_pci_core_device *virtvdev = dev_get_drvdata(&pdev->dev); 416 + 417 + virtvdev->pci_cmd = 0; 418 + }
+73 -405
drivers/vfio/pci/virtio/main.c
··· 16 16 #include <linux/virtio_net.h> 17 17 #include <linux/virtio_pci_admin.h> 18 18 19 - struct virtiovf_pci_core_device { 20 - struct vfio_pci_core_device core_device; 21 - u8 *bar0_virtual_buf; 22 - /* synchronize access to the virtual buf */ 23 - struct mutex bar_mutex; 24 - void __iomem *notify_addr; 25 - u64 notify_offset; 26 - __le32 pci_base_addr_0; 27 - __le16 pci_cmd; 28 - u8 bar0_virtual_buf_size; 29 - u8 notify_bar; 30 - }; 31 - 32 - static int 33 - virtiovf_issue_legacy_rw_cmd(struct virtiovf_pci_core_device *virtvdev, 34 - loff_t pos, char __user *buf, 35 - size_t count, bool read) 36 - { 37 - bool msix_enabled = 38 - (virtvdev->core_device.irq_type == VFIO_PCI_MSIX_IRQ_INDEX); 39 - struct pci_dev *pdev = virtvdev->core_device.pdev; 40 - u8 *bar0_buf = virtvdev->bar0_virtual_buf; 41 - bool common; 42 - u8 offset; 43 - int ret; 44 - 45 - common = pos < VIRTIO_PCI_CONFIG_OFF(msix_enabled); 46 - /* offset within the relevant configuration area */ 47 - offset = common ? pos : pos - VIRTIO_PCI_CONFIG_OFF(msix_enabled); 48 - mutex_lock(&virtvdev->bar_mutex); 49 - if (read) { 50 - if (common) 51 - ret = virtio_pci_admin_legacy_common_io_read(pdev, offset, 52 - count, bar0_buf + pos); 53 - else 54 - ret = virtio_pci_admin_legacy_device_io_read(pdev, offset, 55 - count, bar0_buf + pos); 56 - if (ret) 57 - goto out; 58 - if (copy_to_user(buf, bar0_buf + pos, count)) 59 - ret = -EFAULT; 60 - } else { 61 - if (copy_from_user(bar0_buf + pos, buf, count)) { 62 - ret = -EFAULT; 63 - goto out; 64 - } 65 - 66 - if (common) 67 - ret = virtio_pci_admin_legacy_common_io_write(pdev, offset, 68 - count, bar0_buf + pos); 69 - else 70 - ret = virtio_pci_admin_legacy_device_io_write(pdev, offset, 71 - count, bar0_buf + pos); 72 - } 73 - out: 74 - mutex_unlock(&virtvdev->bar_mutex); 75 - return ret; 76 - } 77 - 78 - static int 79 - virtiovf_pci_bar0_rw(struct virtiovf_pci_core_device *virtvdev, 80 - loff_t pos, char __user *buf, 81 - size_t count, bool read) 82 - { 83 - struct vfio_pci_core_device *core_device = &virtvdev->core_device; 84 - struct pci_dev *pdev = core_device->pdev; 85 - u16 queue_notify; 86 - int ret; 87 - 88 - if (!(le16_to_cpu(virtvdev->pci_cmd) & PCI_COMMAND_IO)) 89 - return -EIO; 90 - 91 - if (pos + count > virtvdev->bar0_virtual_buf_size) 92 - return -EINVAL; 93 - 94 - ret = pm_runtime_resume_and_get(&pdev->dev); 95 - if (ret) { 96 - pci_info_ratelimited(pdev, "runtime resume failed %d\n", ret); 97 - return -EIO; 98 - } 99 - 100 - switch (pos) { 101 - case VIRTIO_PCI_QUEUE_NOTIFY: 102 - if (count != sizeof(queue_notify)) { 103 - ret = -EINVAL; 104 - goto end; 105 - } 106 - if (read) { 107 - ret = vfio_pci_core_ioread16(core_device, true, &queue_notify, 108 - virtvdev->notify_addr); 109 - if (ret) 110 - goto end; 111 - if (copy_to_user(buf, &queue_notify, 112 - sizeof(queue_notify))) { 113 - ret = -EFAULT; 114 - goto end; 115 - } 116 - } else { 117 - if (copy_from_user(&queue_notify, buf, count)) { 118 - ret = -EFAULT; 119 - goto end; 120 - } 121 - ret = vfio_pci_core_iowrite16(core_device, true, queue_notify, 122 - virtvdev->notify_addr); 123 - } 124 - break; 125 - default: 126 - ret = virtiovf_issue_legacy_rw_cmd(virtvdev, pos, buf, count, 127 - read); 128 - } 129 - 130 - end: 131 - pm_runtime_put(&pdev->dev); 132 - return ret ? ret : count; 133 - } 134 - 135 - static ssize_t virtiovf_pci_read_config(struct vfio_device *core_vdev, 136 - char __user *buf, size_t count, 137 - loff_t *ppos) 138 - { 139 - struct virtiovf_pci_core_device *virtvdev = container_of( 140 - core_vdev, struct virtiovf_pci_core_device, core_device.vdev); 141 - loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; 142 - size_t register_offset; 143 - loff_t copy_offset; 144 - size_t copy_count; 145 - __le32 val32; 146 - __le16 val16; 147 - u8 val8; 148 - int ret; 149 - 150 - ret = vfio_pci_core_read(core_vdev, buf, count, ppos); 151 - if (ret < 0) 152 - return ret; 153 - 154 - if (vfio_pci_core_range_intersect_range(pos, count, PCI_DEVICE_ID, 155 - sizeof(val16), &copy_offset, 156 - &copy_count, &register_offset)) { 157 - val16 = cpu_to_le16(VIRTIO_TRANS_ID_NET); 158 - if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset, copy_count)) 159 - return -EFAULT; 160 - } 161 - 162 - if ((le16_to_cpu(virtvdev->pci_cmd) & PCI_COMMAND_IO) && 163 - vfio_pci_core_range_intersect_range(pos, count, PCI_COMMAND, 164 - sizeof(val16), &copy_offset, 165 - &copy_count, &register_offset)) { 166 - if (copy_from_user((void *)&val16 + register_offset, buf + copy_offset, 167 - copy_count)) 168 - return -EFAULT; 169 - val16 |= cpu_to_le16(PCI_COMMAND_IO); 170 - if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset, 171 - copy_count)) 172 - return -EFAULT; 173 - } 174 - 175 - if (vfio_pci_core_range_intersect_range(pos, count, PCI_REVISION_ID, 176 - sizeof(val8), &copy_offset, 177 - &copy_count, &register_offset)) { 178 - /* Transional needs to have revision 0 */ 179 - val8 = 0; 180 - if (copy_to_user(buf + copy_offset, &val8, copy_count)) 181 - return -EFAULT; 182 - } 183 - 184 - if (vfio_pci_core_range_intersect_range(pos, count, PCI_BASE_ADDRESS_0, 185 - sizeof(val32), &copy_offset, 186 - &copy_count, &register_offset)) { 187 - u32 bar_mask = ~(virtvdev->bar0_virtual_buf_size - 1); 188 - u32 pci_base_addr_0 = le32_to_cpu(virtvdev->pci_base_addr_0); 189 - 190 - val32 = cpu_to_le32((pci_base_addr_0 & bar_mask) | PCI_BASE_ADDRESS_SPACE_IO); 191 - if (copy_to_user(buf + copy_offset, (void *)&val32 + register_offset, copy_count)) 192 - return -EFAULT; 193 - } 194 - 195 - if (vfio_pci_core_range_intersect_range(pos, count, PCI_SUBSYSTEM_ID, 196 - sizeof(val16), &copy_offset, 197 - &copy_count, &register_offset)) { 198 - /* 199 - * Transitional devices use the PCI subsystem device id as 200 - * virtio device id, same as legacy driver always did. 201 - */ 202 - val16 = cpu_to_le16(VIRTIO_ID_NET); 203 - if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset, 204 - copy_count)) 205 - return -EFAULT; 206 - } 207 - 208 - if (vfio_pci_core_range_intersect_range(pos, count, PCI_SUBSYSTEM_VENDOR_ID, 209 - sizeof(val16), &copy_offset, 210 - &copy_count, &register_offset)) { 211 - val16 = cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET); 212 - if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset, 213 - copy_count)) 214 - return -EFAULT; 215 - } 216 - 217 - return count; 218 - } 219 - 220 - static ssize_t 221 - virtiovf_pci_core_read(struct vfio_device *core_vdev, char __user *buf, 222 - size_t count, loff_t *ppos) 223 - { 224 - struct virtiovf_pci_core_device *virtvdev = container_of( 225 - core_vdev, struct virtiovf_pci_core_device, core_device.vdev); 226 - unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); 227 - loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; 228 - 229 - if (!count) 230 - return 0; 231 - 232 - if (index == VFIO_PCI_CONFIG_REGION_INDEX) 233 - return virtiovf_pci_read_config(core_vdev, buf, count, ppos); 234 - 235 - if (index == VFIO_PCI_BAR0_REGION_INDEX) 236 - return virtiovf_pci_bar0_rw(virtvdev, pos, buf, count, true); 237 - 238 - return vfio_pci_core_read(core_vdev, buf, count, ppos); 239 - } 240 - 241 - static ssize_t virtiovf_pci_write_config(struct vfio_device *core_vdev, 242 - const char __user *buf, size_t count, 243 - loff_t *ppos) 244 - { 245 - struct virtiovf_pci_core_device *virtvdev = container_of( 246 - core_vdev, struct virtiovf_pci_core_device, core_device.vdev); 247 - loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; 248 - size_t register_offset; 249 - loff_t copy_offset; 250 - size_t copy_count; 251 - 252 - if (vfio_pci_core_range_intersect_range(pos, count, PCI_COMMAND, 253 - sizeof(virtvdev->pci_cmd), 254 - &copy_offset, &copy_count, 255 - &register_offset)) { 256 - if (copy_from_user((void *)&virtvdev->pci_cmd + register_offset, 257 - buf + copy_offset, 258 - copy_count)) 259 - return -EFAULT; 260 - } 261 - 262 - if (vfio_pci_core_range_intersect_range(pos, count, PCI_BASE_ADDRESS_0, 263 - sizeof(virtvdev->pci_base_addr_0), 264 - &copy_offset, &copy_count, 265 - &register_offset)) { 266 - if (copy_from_user((void *)&virtvdev->pci_base_addr_0 + register_offset, 267 - buf + copy_offset, 268 - copy_count)) 269 - return -EFAULT; 270 - } 271 - 272 - return vfio_pci_core_write(core_vdev, buf, count, ppos); 273 - } 274 - 275 - static ssize_t 276 - virtiovf_pci_core_write(struct vfio_device *core_vdev, const char __user *buf, 277 - size_t count, loff_t *ppos) 278 - { 279 - struct virtiovf_pci_core_device *virtvdev = container_of( 280 - core_vdev, struct virtiovf_pci_core_device, core_device.vdev); 281 - unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); 282 - loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; 283 - 284 - if (!count) 285 - return 0; 286 - 287 - if (index == VFIO_PCI_CONFIG_REGION_INDEX) 288 - return virtiovf_pci_write_config(core_vdev, buf, count, ppos); 289 - 290 - if (index == VFIO_PCI_BAR0_REGION_INDEX) 291 - return virtiovf_pci_bar0_rw(virtvdev, pos, (char __user *)buf, count, false); 292 - 293 - return vfio_pci_core_write(core_vdev, buf, count, ppos); 294 - } 295 - 296 - static int 297 - virtiovf_pci_ioctl_get_region_info(struct vfio_device *core_vdev, 298 - unsigned int cmd, unsigned long arg) 299 - { 300 - struct virtiovf_pci_core_device *virtvdev = container_of( 301 - core_vdev, struct virtiovf_pci_core_device, core_device.vdev); 302 - unsigned long minsz = offsetofend(struct vfio_region_info, offset); 303 - void __user *uarg = (void __user *)arg; 304 - struct vfio_region_info info = {}; 305 - 306 - if (copy_from_user(&info, uarg, minsz)) 307 - return -EFAULT; 308 - 309 - if (info.argsz < minsz) 310 - return -EINVAL; 311 - 312 - switch (info.index) { 313 - case VFIO_PCI_BAR0_REGION_INDEX: 314 - info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 315 - info.size = virtvdev->bar0_virtual_buf_size; 316 - info.flags = VFIO_REGION_INFO_FLAG_READ | 317 - VFIO_REGION_INFO_FLAG_WRITE; 318 - return copy_to_user(uarg, &info, minsz) ? -EFAULT : 0; 319 - default: 320 - return vfio_pci_core_ioctl(core_vdev, cmd, arg); 321 - } 322 - } 323 - 324 - static long 325 - virtiovf_vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, 326 - unsigned long arg) 327 - { 328 - switch (cmd) { 329 - case VFIO_DEVICE_GET_REGION_INFO: 330 - return virtiovf_pci_ioctl_get_region_info(core_vdev, cmd, arg); 331 - default: 332 - return vfio_pci_core_ioctl(core_vdev, cmd, arg); 333 - } 334 - } 335 - 336 - static int 337 - virtiovf_set_notify_addr(struct virtiovf_pci_core_device *virtvdev) 338 - { 339 - struct vfio_pci_core_device *core_device = &virtvdev->core_device; 340 - int ret; 341 - 342 - /* 343 - * Setup the BAR where the 'notify' exists to be used by vfio as well 344 - * This will let us mmap it only once and use it when needed. 345 - */ 346 - ret = vfio_pci_core_setup_barmap(core_device, 347 - virtvdev->notify_bar); 348 - if (ret) 349 - return ret; 350 - 351 - virtvdev->notify_addr = core_device->barmap[virtvdev->notify_bar] + 352 - virtvdev->notify_offset; 353 - return 0; 354 - } 19 + #include "common.h" 355 20 356 21 static int virtiovf_pci_open_device(struct vfio_device *core_vdev) 357 22 { 358 - struct virtiovf_pci_core_device *virtvdev = container_of( 359 - core_vdev, struct virtiovf_pci_core_device, core_device.vdev); 23 + struct virtiovf_pci_core_device *virtvdev = container_of(core_vdev, 24 + struct virtiovf_pci_core_device, core_device.vdev); 360 25 struct vfio_pci_core_device *vdev = &virtvdev->core_device; 361 26 int ret; 362 27 ··· 29 364 if (ret) 30 365 return ret; 31 366 32 - if (virtvdev->bar0_virtual_buf) { 33 - /* 34 - * Upon close_device() the vfio_pci_core_disable() is called 35 - * and will close all the previous mmaps, so it seems that the 36 - * valid life cycle for the 'notify' addr is per open/close. 37 - */ 38 - ret = virtiovf_set_notify_addr(virtvdev); 39 - if (ret) { 40 - vfio_pci_core_disable(vdev); 41 - return ret; 42 - } 367 + #ifdef CONFIG_VIRTIO_VFIO_PCI_ADMIN_LEGACY 368 + ret = virtiovf_open_legacy_io(virtvdev); 369 + if (ret) { 370 + vfio_pci_core_disable(vdev); 371 + return ret; 43 372 } 373 + #endif 44 374 375 + virtiovf_open_migration(virtvdev); 45 376 vfio_pci_core_finish_enable(vdev); 46 377 return 0; 47 378 } 48 379 49 - static int virtiovf_get_device_config_size(unsigned short device) 380 + static void virtiovf_pci_close_device(struct vfio_device *core_vdev) 50 381 { 51 - /* Network card */ 52 - return offsetofend(struct virtio_net_config, status); 382 + struct virtiovf_pci_core_device *virtvdev = container_of(core_vdev, 383 + struct virtiovf_pci_core_device, core_device.vdev); 384 + 385 + virtiovf_close_migration(virtvdev); 386 + vfio_pci_core_close_device(core_vdev); 53 387 } 54 388 55 - static int virtiovf_read_notify_info(struct virtiovf_pci_core_device *virtvdev) 56 - { 57 - u64 offset; 58 - int ret; 59 - u8 bar; 60 - 61 - ret = virtio_pci_admin_legacy_io_notify_info(virtvdev->core_device.pdev, 62 - VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_OWNER_MEM, 63 - &bar, &offset); 64 - if (ret) 65 - return ret; 66 - 67 - virtvdev->notify_bar = bar; 68 - virtvdev->notify_offset = offset; 69 - return 0; 70 - } 71 - 389 + #ifdef CONFIG_VIRTIO_VFIO_PCI_ADMIN_LEGACY 72 390 static int virtiovf_pci_init_device(struct vfio_device *core_vdev) 73 391 { 74 - struct virtiovf_pci_core_device *virtvdev = container_of( 75 - core_vdev, struct virtiovf_pci_core_device, core_device.vdev); 76 - struct pci_dev *pdev; 392 + struct virtiovf_pci_core_device *virtvdev = container_of(core_vdev, 393 + struct virtiovf_pci_core_device, core_device.vdev); 77 394 int ret; 78 395 79 396 ret = vfio_pci_core_init_dev(core_vdev); 80 397 if (ret) 81 398 return ret; 82 399 83 - pdev = virtvdev->core_device.pdev; 84 - ret = virtiovf_read_notify_info(virtvdev); 85 - if (ret) 86 - return ret; 87 - 88 - virtvdev->bar0_virtual_buf_size = VIRTIO_PCI_CONFIG_OFF(true) + 89 - virtiovf_get_device_config_size(pdev->device); 90 - BUILD_BUG_ON(!is_power_of_2(virtvdev->bar0_virtual_buf_size)); 91 - virtvdev->bar0_virtual_buf = kzalloc(virtvdev->bar0_virtual_buf_size, 92 - GFP_KERNEL); 93 - if (!virtvdev->bar0_virtual_buf) 94 - return -ENOMEM; 95 - mutex_init(&virtvdev->bar_mutex); 96 - return 0; 400 + /* 401 + * The vfio_device_ops.init() callback is set to virtiovf_pci_init_device() 402 + * only when legacy I/O is supported. Now, let's initialize it. 403 + */ 404 + return virtiovf_init_legacy_io(virtvdev); 97 405 } 406 + #endif 98 407 99 408 static void virtiovf_pci_core_release_dev(struct vfio_device *core_vdev) 100 409 { 101 - struct virtiovf_pci_core_device *virtvdev = container_of( 102 - core_vdev, struct virtiovf_pci_core_device, core_device.vdev); 410 + #ifdef CONFIG_VIRTIO_VFIO_PCI_ADMIN_LEGACY 411 + struct virtiovf_pci_core_device *virtvdev = container_of(core_vdev, 412 + struct virtiovf_pci_core_device, core_device.vdev); 103 413 104 - kfree(virtvdev->bar0_virtual_buf); 414 + virtiovf_release_legacy_io(virtvdev); 415 + #endif 105 416 vfio_pci_core_release_dev(core_vdev); 106 417 } 107 418 108 - static const struct vfio_device_ops virtiovf_vfio_pci_tran_ops = { 109 - .name = "virtio-vfio-pci-trans", 419 + static const struct vfio_device_ops virtiovf_vfio_pci_lm_ops = { 420 + .name = "virtio-vfio-pci-lm", 421 + .init = vfio_pci_core_init_dev, 422 + .release = virtiovf_pci_core_release_dev, 423 + .open_device = virtiovf_pci_open_device, 424 + .close_device = virtiovf_pci_close_device, 425 + .ioctl = vfio_pci_core_ioctl, 426 + .device_feature = vfio_pci_core_ioctl_feature, 427 + .read = vfio_pci_core_read, 428 + .write = vfio_pci_core_write, 429 + .mmap = vfio_pci_core_mmap, 430 + .request = vfio_pci_core_request, 431 + .match = vfio_pci_core_match, 432 + .bind_iommufd = vfio_iommufd_physical_bind, 433 + .unbind_iommufd = vfio_iommufd_physical_unbind, 434 + .attach_ioas = vfio_iommufd_physical_attach_ioas, 435 + .detach_ioas = vfio_iommufd_physical_detach_ioas, 436 + }; 437 + 438 + #ifdef CONFIG_VIRTIO_VFIO_PCI_ADMIN_LEGACY 439 + static const struct vfio_device_ops virtiovf_vfio_pci_tran_lm_ops = { 440 + .name = "virtio-vfio-pci-trans-lm", 110 441 .init = virtiovf_pci_init_device, 111 442 .release = virtiovf_pci_core_release_dev, 112 443 .open_device = virtiovf_pci_open_device, 113 - .close_device = vfio_pci_core_close_device, 444 + .close_device = virtiovf_pci_close_device, 114 445 .ioctl = virtiovf_vfio_pci_core_ioctl, 115 446 .device_feature = vfio_pci_core_ioctl_feature, 116 447 .read = virtiovf_pci_core_read, ··· 119 458 .attach_ioas = vfio_iommufd_physical_attach_ioas, 120 459 .detach_ioas = vfio_iommufd_physical_detach_ioas, 121 460 }; 461 + #endif 122 462 123 463 static const struct vfio_device_ops virtiovf_vfio_pci_ops = { 124 464 .name = "virtio-vfio-pci", ··· 140 478 .detach_ioas = vfio_iommufd_physical_detach_ioas, 141 479 }; 142 480 143 - static bool virtiovf_bar0_exists(struct pci_dev *pdev) 144 - { 145 - struct resource *res = pdev->resource; 146 - 147 - return res->flags; 148 - } 149 - 150 481 static int virtiovf_pci_probe(struct pci_dev *pdev, 151 482 const struct pci_device_id *id) 152 483 { 153 484 const struct vfio_device_ops *ops = &virtiovf_vfio_pci_ops; 154 485 struct virtiovf_pci_core_device *virtvdev; 486 + bool sup_legacy_io = false; 487 + bool sup_lm = false; 155 488 int ret; 156 489 157 - if (pdev->is_virtfn && virtio_pci_admin_has_legacy_io(pdev) && 158 - !virtiovf_bar0_exists(pdev)) 159 - ops = &virtiovf_vfio_pci_tran_ops; 490 + if (pdev->is_virtfn) { 491 + #ifdef CONFIG_VIRTIO_VFIO_PCI_ADMIN_LEGACY 492 + sup_legacy_io = virtiovf_support_legacy_io(pdev); 493 + if (sup_legacy_io) 494 + ops = &virtiovf_vfio_pci_tran_lm_ops; 495 + #endif 496 + sup_lm = virtio_pci_admin_has_dev_parts(pdev); 497 + if (sup_lm && !sup_legacy_io) 498 + ops = &virtiovf_vfio_pci_lm_ops; 499 + } 160 500 161 501 virtvdev = vfio_alloc_device(virtiovf_pci_core_device, core_device.vdev, 162 502 &pdev->dev, ops); 163 503 if (IS_ERR(virtvdev)) 164 504 return PTR_ERR(virtvdev); 505 + 506 + if (sup_lm) 507 + virtiovf_set_migratable(virtvdev); 165 508 166 509 dev_set_drvdata(&pdev->dev, &virtvdev->core_device); 167 510 ret = vfio_pci_core_register_device(&virtvdev->core_device); ··· 196 529 197 530 static void virtiovf_pci_aer_reset_done(struct pci_dev *pdev) 198 531 { 199 - struct virtiovf_pci_core_device *virtvdev = dev_get_drvdata(&pdev->dev); 200 - 201 - virtvdev->pci_cmd = 0; 532 + #ifdef CONFIG_VIRTIO_VFIO_PCI_ADMIN_LEGACY 533 + virtiovf_legacy_io_reset_done(pdev); 534 + #endif 535 + virtiovf_migration_reset_done(pdev); 202 536 } 203 537 204 538 static const struct pci_error_handlers virtiovf_err_handlers = {
+1337
drivers/vfio/pci/virtio/migrate.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved 4 + */ 5 + 6 + #include <linux/device.h> 7 + #include <linux/module.h> 8 + #include <linux/mutex.h> 9 + #include <linux/pci.h> 10 + #include <linux/pm_runtime.h> 11 + #include <linux/types.h> 12 + #include <linux/uaccess.h> 13 + #include <linux/vfio.h> 14 + #include <linux/vfio_pci_core.h> 15 + #include <linux/virtio_pci.h> 16 + #include <linux/virtio_net.h> 17 + #include <linux/virtio_pci_admin.h> 18 + #include <linux/anon_inodes.h> 19 + 20 + #include "common.h" 21 + 22 + /* Device specification max parts size */ 23 + #define MAX_LOAD_SIZE (BIT_ULL(BITS_PER_TYPE \ 24 + (((struct virtio_admin_cmd_dev_parts_metadata_result *)0)->parts_size.size)) - 1) 25 + 26 + /* Initial target buffer size */ 27 + #define VIRTIOVF_TARGET_INITIAL_BUF_SIZE SZ_1M 28 + 29 + static int 30 + virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf, 31 + u32 ctx_size); 32 + 33 + static struct page * 34 + virtiovf_get_migration_page(struct virtiovf_data_buffer *buf, 35 + unsigned long offset) 36 + { 37 + unsigned long cur_offset = 0; 38 + struct scatterlist *sg; 39 + unsigned int i; 40 + 41 + /* All accesses are sequential */ 42 + if (offset < buf->last_offset || !buf->last_offset_sg) { 43 + buf->last_offset = 0; 44 + buf->last_offset_sg = buf->table.sgt.sgl; 45 + buf->sg_last_entry = 0; 46 + } 47 + 48 + cur_offset = buf->last_offset; 49 + 50 + for_each_sg(buf->last_offset_sg, sg, 51 + buf->table.sgt.orig_nents - buf->sg_last_entry, i) { 52 + if (offset < sg->length + cur_offset) { 53 + buf->last_offset_sg = sg; 54 + buf->sg_last_entry += i; 55 + buf->last_offset = cur_offset; 56 + return nth_page(sg_page(sg), 57 + (offset - cur_offset) / PAGE_SIZE); 58 + } 59 + cur_offset += sg->length; 60 + } 61 + return NULL; 62 + } 63 + 64 + static int virtiovf_add_migration_pages(struct virtiovf_data_buffer *buf, 65 + unsigned int npages) 66 + { 67 + unsigned int to_alloc = npages; 68 + struct page **page_list; 69 + unsigned long filled; 70 + unsigned int to_fill; 71 + int ret; 72 + int i; 73 + 74 + to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); 75 + page_list = kvcalloc(to_fill, sizeof(*page_list), GFP_KERNEL_ACCOUNT); 76 + if (!page_list) 77 + return -ENOMEM; 78 + 79 + do { 80 + filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill, 81 + page_list); 82 + if (!filled) { 83 + ret = -ENOMEM; 84 + goto err; 85 + } 86 + to_alloc -= filled; 87 + ret = sg_alloc_append_table_from_pages(&buf->table, page_list, 88 + filled, 0, filled << PAGE_SHIFT, UINT_MAX, 89 + SG_MAX_SINGLE_ALLOC, GFP_KERNEL_ACCOUNT); 90 + 91 + if (ret) 92 + goto err_append; 93 + buf->allocated_length += filled * PAGE_SIZE; 94 + /* clean input for another bulk allocation */ 95 + memset(page_list, 0, filled * sizeof(*page_list)); 96 + to_fill = min_t(unsigned int, to_alloc, 97 + PAGE_SIZE / sizeof(*page_list)); 98 + } while (to_alloc > 0); 99 + 100 + kvfree(page_list); 101 + return 0; 102 + 103 + err_append: 104 + for (i = filled - 1; i >= 0; i--) 105 + __free_page(page_list[i]); 106 + err: 107 + kvfree(page_list); 108 + return ret; 109 + } 110 + 111 + static void virtiovf_free_data_buffer(struct virtiovf_data_buffer *buf) 112 + { 113 + struct sg_page_iter sg_iter; 114 + 115 + /* Undo alloc_pages_bulk_array() */ 116 + for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0) 117 + __free_page(sg_page_iter_page(&sg_iter)); 118 + sg_free_append_table(&buf->table); 119 + kfree(buf); 120 + } 121 + 122 + static struct virtiovf_data_buffer * 123 + virtiovf_alloc_data_buffer(struct virtiovf_migration_file *migf, size_t length) 124 + { 125 + struct virtiovf_data_buffer *buf; 126 + int ret; 127 + 128 + buf = kzalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); 129 + if (!buf) 130 + return ERR_PTR(-ENOMEM); 131 + 132 + ret = virtiovf_add_migration_pages(buf, 133 + DIV_ROUND_UP_ULL(length, PAGE_SIZE)); 134 + if (ret) 135 + goto end; 136 + 137 + buf->migf = migf; 138 + return buf; 139 + end: 140 + virtiovf_free_data_buffer(buf); 141 + return ERR_PTR(ret); 142 + } 143 + 144 + static void virtiovf_put_data_buffer(struct virtiovf_data_buffer *buf) 145 + { 146 + spin_lock_irq(&buf->migf->list_lock); 147 + list_add_tail(&buf->buf_elm, &buf->migf->avail_list); 148 + spin_unlock_irq(&buf->migf->list_lock); 149 + } 150 + 151 + static int 152 + virtiovf_pci_alloc_obj_id(struct virtiovf_pci_core_device *virtvdev, u8 type, 153 + u32 *obj_id) 154 + { 155 + return virtio_pci_admin_obj_create(virtvdev->core_device.pdev, 156 + VIRTIO_RESOURCE_OBJ_DEV_PARTS, type, obj_id); 157 + } 158 + 159 + static void 160 + virtiovf_pci_free_obj_id(struct virtiovf_pci_core_device *virtvdev, u32 obj_id) 161 + { 162 + virtio_pci_admin_obj_destroy(virtvdev->core_device.pdev, 163 + VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id); 164 + } 165 + 166 + static struct virtiovf_data_buffer * 167 + virtiovf_get_data_buffer(struct virtiovf_migration_file *migf, size_t length) 168 + { 169 + struct virtiovf_data_buffer *buf, *temp_buf; 170 + struct list_head free_list; 171 + 172 + INIT_LIST_HEAD(&free_list); 173 + 174 + spin_lock_irq(&migf->list_lock); 175 + list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) { 176 + list_del_init(&buf->buf_elm); 177 + if (buf->allocated_length >= length) { 178 + spin_unlock_irq(&migf->list_lock); 179 + goto found; 180 + } 181 + /* 182 + * Prevent holding redundant buffers. Put in a free 183 + * list and call at the end not under the spin lock 184 + * (&migf->list_lock) to minimize its scope usage. 185 + */ 186 + list_add(&buf->buf_elm, &free_list); 187 + } 188 + spin_unlock_irq(&migf->list_lock); 189 + buf = virtiovf_alloc_data_buffer(migf, length); 190 + 191 + found: 192 + while ((temp_buf = list_first_entry_or_null(&free_list, 193 + struct virtiovf_data_buffer, buf_elm))) { 194 + list_del(&temp_buf->buf_elm); 195 + virtiovf_free_data_buffer(temp_buf); 196 + } 197 + 198 + return buf; 199 + } 200 + 201 + static void virtiovf_clean_migf_resources(struct virtiovf_migration_file *migf) 202 + { 203 + struct virtiovf_data_buffer *entry; 204 + 205 + if (migf->buf) { 206 + virtiovf_free_data_buffer(migf->buf); 207 + migf->buf = NULL; 208 + } 209 + 210 + if (migf->buf_header) { 211 + virtiovf_free_data_buffer(migf->buf_header); 212 + migf->buf_header = NULL; 213 + } 214 + 215 + list_splice(&migf->avail_list, &migf->buf_list); 216 + 217 + while ((entry = list_first_entry_or_null(&migf->buf_list, 218 + struct virtiovf_data_buffer, buf_elm))) { 219 + list_del(&entry->buf_elm); 220 + virtiovf_free_data_buffer(entry); 221 + } 222 + 223 + if (migf->has_obj_id) 224 + virtiovf_pci_free_obj_id(migf->virtvdev, migf->obj_id); 225 + } 226 + 227 + static void virtiovf_disable_fd(struct virtiovf_migration_file *migf) 228 + { 229 + mutex_lock(&migf->lock); 230 + migf->state = VIRTIOVF_MIGF_STATE_ERROR; 231 + migf->filp->f_pos = 0; 232 + mutex_unlock(&migf->lock); 233 + } 234 + 235 + static void virtiovf_disable_fds(struct virtiovf_pci_core_device *virtvdev) 236 + { 237 + if (virtvdev->resuming_migf) { 238 + virtiovf_disable_fd(virtvdev->resuming_migf); 239 + virtiovf_clean_migf_resources(virtvdev->resuming_migf); 240 + fput(virtvdev->resuming_migf->filp); 241 + virtvdev->resuming_migf = NULL; 242 + } 243 + if (virtvdev->saving_migf) { 244 + virtiovf_disable_fd(virtvdev->saving_migf); 245 + virtiovf_clean_migf_resources(virtvdev->saving_migf); 246 + fput(virtvdev->saving_migf->filp); 247 + virtvdev->saving_migf = NULL; 248 + } 249 + } 250 + 251 + /* 252 + * This function is called in all state_mutex unlock cases to 253 + * handle a 'deferred_reset' if exists. 254 + */ 255 + static void virtiovf_state_mutex_unlock(struct virtiovf_pci_core_device *virtvdev) 256 + { 257 + again: 258 + spin_lock(&virtvdev->reset_lock); 259 + if (virtvdev->deferred_reset) { 260 + virtvdev->deferred_reset = false; 261 + spin_unlock(&virtvdev->reset_lock); 262 + virtvdev->mig_state = VFIO_DEVICE_STATE_RUNNING; 263 + virtiovf_disable_fds(virtvdev); 264 + goto again; 265 + } 266 + mutex_unlock(&virtvdev->state_mutex); 267 + spin_unlock(&virtvdev->reset_lock); 268 + } 269 + 270 + void virtiovf_migration_reset_done(struct pci_dev *pdev) 271 + { 272 + struct virtiovf_pci_core_device *virtvdev = dev_get_drvdata(&pdev->dev); 273 + 274 + if (!virtvdev->migrate_cap) 275 + return; 276 + 277 + /* 278 + * As the higher VFIO layers are holding locks across reset and using 279 + * those same locks with the mm_lock we need to prevent ABBA deadlock 280 + * with the state_mutex and mm_lock. 281 + * In case the state_mutex was taken already we defer the cleanup work 282 + * to the unlock flow of the other running context. 283 + */ 284 + spin_lock(&virtvdev->reset_lock); 285 + virtvdev->deferred_reset = true; 286 + if (!mutex_trylock(&virtvdev->state_mutex)) { 287 + spin_unlock(&virtvdev->reset_lock); 288 + return; 289 + } 290 + spin_unlock(&virtvdev->reset_lock); 291 + virtiovf_state_mutex_unlock(virtvdev); 292 + } 293 + 294 + static int virtiovf_release_file(struct inode *inode, struct file *filp) 295 + { 296 + struct virtiovf_migration_file *migf = filp->private_data; 297 + 298 + virtiovf_disable_fd(migf); 299 + mutex_destroy(&migf->lock); 300 + kfree(migf); 301 + return 0; 302 + } 303 + 304 + static struct virtiovf_data_buffer * 305 + virtiovf_get_data_buff_from_pos(struct virtiovf_migration_file *migf, 306 + loff_t pos, bool *end_of_data) 307 + { 308 + struct virtiovf_data_buffer *buf; 309 + bool found = false; 310 + 311 + *end_of_data = false; 312 + spin_lock_irq(&migf->list_lock); 313 + if (list_empty(&migf->buf_list)) { 314 + *end_of_data = true; 315 + goto end; 316 + } 317 + 318 + buf = list_first_entry(&migf->buf_list, struct virtiovf_data_buffer, 319 + buf_elm); 320 + if (pos >= buf->start_pos && 321 + pos < buf->start_pos + buf->length) { 322 + found = true; 323 + goto end; 324 + } 325 + 326 + /* 327 + * As we use a stream based FD we may expect having the data always 328 + * on first chunk 329 + */ 330 + migf->state = VIRTIOVF_MIGF_STATE_ERROR; 331 + 332 + end: 333 + spin_unlock_irq(&migf->list_lock); 334 + return found ? buf : NULL; 335 + } 336 + 337 + static ssize_t virtiovf_buf_read(struct virtiovf_data_buffer *vhca_buf, 338 + char __user **buf, size_t *len, loff_t *pos) 339 + { 340 + unsigned long offset; 341 + ssize_t done = 0; 342 + size_t copy_len; 343 + 344 + copy_len = min_t(size_t, 345 + vhca_buf->start_pos + vhca_buf->length - *pos, *len); 346 + while (copy_len) { 347 + size_t page_offset; 348 + struct page *page; 349 + size_t page_len; 350 + u8 *from_buff; 351 + int ret; 352 + 353 + offset = *pos - vhca_buf->start_pos; 354 + page_offset = offset % PAGE_SIZE; 355 + offset -= page_offset; 356 + page = virtiovf_get_migration_page(vhca_buf, offset); 357 + if (!page) 358 + return -EINVAL; 359 + page_len = min_t(size_t, copy_len, PAGE_SIZE - page_offset); 360 + from_buff = kmap_local_page(page); 361 + ret = copy_to_user(*buf, from_buff + page_offset, page_len); 362 + kunmap_local(from_buff); 363 + if (ret) 364 + return -EFAULT; 365 + *pos += page_len; 366 + *len -= page_len; 367 + *buf += page_len; 368 + done += page_len; 369 + copy_len -= page_len; 370 + } 371 + 372 + if (*pos >= vhca_buf->start_pos + vhca_buf->length) { 373 + spin_lock_irq(&vhca_buf->migf->list_lock); 374 + list_del_init(&vhca_buf->buf_elm); 375 + list_add_tail(&vhca_buf->buf_elm, &vhca_buf->migf->avail_list); 376 + spin_unlock_irq(&vhca_buf->migf->list_lock); 377 + } 378 + 379 + return done; 380 + } 381 + 382 + static ssize_t virtiovf_save_read(struct file *filp, char __user *buf, size_t len, 383 + loff_t *pos) 384 + { 385 + struct virtiovf_migration_file *migf = filp->private_data; 386 + struct virtiovf_data_buffer *vhca_buf; 387 + bool first_loop_call = true; 388 + bool end_of_data; 389 + ssize_t done = 0; 390 + 391 + if (pos) 392 + return -ESPIPE; 393 + pos = &filp->f_pos; 394 + 395 + mutex_lock(&migf->lock); 396 + if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) { 397 + done = -ENODEV; 398 + goto out_unlock; 399 + } 400 + 401 + while (len) { 402 + ssize_t count; 403 + 404 + vhca_buf = virtiovf_get_data_buff_from_pos(migf, *pos, &end_of_data); 405 + if (first_loop_call) { 406 + first_loop_call = false; 407 + /* Temporary end of file as part of PRE_COPY */ 408 + if (end_of_data && migf->state == VIRTIOVF_MIGF_STATE_PRECOPY) { 409 + done = -ENOMSG; 410 + goto out_unlock; 411 + } 412 + if (end_of_data && migf->state != VIRTIOVF_MIGF_STATE_COMPLETE) { 413 + done = -EINVAL; 414 + goto out_unlock; 415 + } 416 + } 417 + 418 + if (end_of_data) 419 + goto out_unlock; 420 + 421 + if (!vhca_buf) { 422 + done = -EINVAL; 423 + goto out_unlock; 424 + } 425 + 426 + count = virtiovf_buf_read(vhca_buf, &buf, &len, pos); 427 + if (count < 0) { 428 + done = count; 429 + goto out_unlock; 430 + } 431 + done += count; 432 + } 433 + 434 + out_unlock: 435 + mutex_unlock(&migf->lock); 436 + return done; 437 + } 438 + 439 + static long virtiovf_precopy_ioctl(struct file *filp, unsigned int cmd, 440 + unsigned long arg) 441 + { 442 + struct virtiovf_migration_file *migf = filp->private_data; 443 + struct virtiovf_pci_core_device *virtvdev = migf->virtvdev; 444 + struct vfio_precopy_info info = {}; 445 + loff_t *pos = &filp->f_pos; 446 + bool end_of_data = false; 447 + unsigned long minsz; 448 + u32 ctx_size = 0; 449 + int ret; 450 + 451 + if (cmd != VFIO_MIG_GET_PRECOPY_INFO) 452 + return -ENOTTY; 453 + 454 + minsz = offsetofend(struct vfio_precopy_info, dirty_bytes); 455 + if (copy_from_user(&info, (void __user *)arg, minsz)) 456 + return -EFAULT; 457 + 458 + if (info.argsz < minsz) 459 + return -EINVAL; 460 + 461 + mutex_lock(&virtvdev->state_mutex); 462 + if (virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY && 463 + virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) { 464 + ret = -EINVAL; 465 + goto err_state_unlock; 466 + } 467 + 468 + /* 469 + * The virtio specification does not include a PRE_COPY concept. 470 + * Since we can expect the data to remain the same for a certain period, 471 + * we use a rate limiter mechanism before making a call to the device. 472 + */ 473 + if (__ratelimit(&migf->pre_copy_rl_state)) { 474 + 475 + ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev, 476 + VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id, 477 + VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE, 478 + &ctx_size); 479 + if (ret) 480 + goto err_state_unlock; 481 + } 482 + 483 + mutex_lock(&migf->lock); 484 + if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) { 485 + ret = -ENODEV; 486 + goto err_migf_unlock; 487 + } 488 + 489 + if (migf->pre_copy_initial_bytes > *pos) { 490 + info.initial_bytes = migf->pre_copy_initial_bytes - *pos; 491 + } else { 492 + info.dirty_bytes = migf->max_pos - *pos; 493 + if (!info.dirty_bytes) 494 + end_of_data = true; 495 + info.dirty_bytes += ctx_size; 496 + } 497 + 498 + if (!end_of_data || !ctx_size) { 499 + mutex_unlock(&migf->lock); 500 + goto done; 501 + } 502 + 503 + mutex_unlock(&migf->lock); 504 + /* 505 + * We finished transferring the current state and the device has a 506 + * dirty state, read a new state. 507 + */ 508 + ret = virtiovf_read_device_context_chunk(migf, ctx_size); 509 + if (ret) 510 + /* 511 + * The machine is running, and context size could be grow, so no reason to mark 512 + * the device state as VIRTIOVF_MIGF_STATE_ERROR. 513 + */ 514 + goto err_state_unlock; 515 + 516 + done: 517 + virtiovf_state_mutex_unlock(virtvdev); 518 + if (copy_to_user((void __user *)arg, &info, minsz)) 519 + return -EFAULT; 520 + return 0; 521 + 522 + err_migf_unlock: 523 + mutex_unlock(&migf->lock); 524 + err_state_unlock: 525 + virtiovf_state_mutex_unlock(virtvdev); 526 + return ret; 527 + } 528 + 529 + static const struct file_operations virtiovf_save_fops = { 530 + .owner = THIS_MODULE, 531 + .read = virtiovf_save_read, 532 + .unlocked_ioctl = virtiovf_precopy_ioctl, 533 + .compat_ioctl = compat_ptr_ioctl, 534 + .release = virtiovf_release_file, 535 + }; 536 + 537 + static int 538 + virtiovf_add_buf_header(struct virtiovf_data_buffer *header_buf, 539 + u32 data_size) 540 + { 541 + struct virtiovf_migration_file *migf = header_buf->migf; 542 + struct virtiovf_migration_header header = {}; 543 + struct page *page; 544 + u8 *to_buff; 545 + 546 + header.record_size = cpu_to_le64(data_size); 547 + header.flags = cpu_to_le32(VIRTIOVF_MIGF_HEADER_FLAGS_TAG_MANDATORY); 548 + header.tag = cpu_to_le32(VIRTIOVF_MIGF_HEADER_TAG_DEVICE_DATA); 549 + page = virtiovf_get_migration_page(header_buf, 0); 550 + if (!page) 551 + return -EINVAL; 552 + to_buff = kmap_local_page(page); 553 + memcpy(to_buff, &header, sizeof(header)); 554 + kunmap_local(to_buff); 555 + header_buf->length = sizeof(header); 556 + header_buf->start_pos = header_buf->migf->max_pos; 557 + migf->max_pos += header_buf->length; 558 + spin_lock_irq(&migf->list_lock); 559 + list_add_tail(&header_buf->buf_elm, &migf->buf_list); 560 + spin_unlock_irq(&migf->list_lock); 561 + return 0; 562 + } 563 + 564 + static int 565 + virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf, 566 + u32 ctx_size) 567 + { 568 + struct virtiovf_data_buffer *header_buf; 569 + struct virtiovf_data_buffer *buf; 570 + bool unmark_end = false; 571 + struct scatterlist *sg; 572 + unsigned int i; 573 + u32 res_size; 574 + int nent; 575 + int ret; 576 + 577 + buf = virtiovf_get_data_buffer(migf, ctx_size); 578 + if (IS_ERR(buf)) 579 + return PTR_ERR(buf); 580 + 581 + /* Find the total count of SG entries which satisfies the size */ 582 + nent = sg_nents_for_len(buf->table.sgt.sgl, ctx_size); 583 + if (nent <= 0) { 584 + ret = -EINVAL; 585 + goto out; 586 + } 587 + 588 + /* 589 + * Iterate to that SG entry and mark it as last (if it's not already) 590 + * to let underlay layers iterate only till that entry. 591 + */ 592 + for_each_sg(buf->table.sgt.sgl, sg, nent - 1, i) 593 + ; 594 + 595 + if (!sg_is_last(sg)) { 596 + unmark_end = true; 597 + sg_mark_end(sg); 598 + } 599 + 600 + ret = virtio_pci_admin_dev_parts_get(migf->virtvdev->core_device.pdev, 601 + VIRTIO_RESOURCE_OBJ_DEV_PARTS, 602 + migf->obj_id, 603 + VIRTIO_ADMIN_CMD_DEV_PARTS_GET_TYPE_ALL, 604 + buf->table.sgt.sgl, &res_size); 605 + /* Restore the original SG mark end */ 606 + if (unmark_end) 607 + sg_unmark_end(sg); 608 + if (ret) 609 + goto out; 610 + 611 + buf->length = res_size; 612 + header_buf = virtiovf_get_data_buffer(migf, 613 + sizeof(struct virtiovf_migration_header)); 614 + if (IS_ERR(header_buf)) { 615 + ret = PTR_ERR(header_buf); 616 + goto out; 617 + } 618 + 619 + ret = virtiovf_add_buf_header(header_buf, res_size); 620 + if (ret) 621 + goto out_header; 622 + 623 + buf->start_pos = buf->migf->max_pos; 624 + migf->max_pos += buf->length; 625 + spin_lock(&migf->list_lock); 626 + list_add_tail(&buf->buf_elm, &migf->buf_list); 627 + spin_unlock_irq(&migf->list_lock); 628 + return 0; 629 + 630 + out_header: 631 + virtiovf_put_data_buffer(header_buf); 632 + out: 633 + virtiovf_put_data_buffer(buf); 634 + return ret; 635 + } 636 + 637 + static int 638 + virtiovf_pci_save_device_final_data(struct virtiovf_pci_core_device *virtvdev) 639 + { 640 + struct virtiovf_migration_file *migf = virtvdev->saving_migf; 641 + u32 ctx_size; 642 + int ret; 643 + 644 + if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) 645 + return -ENODEV; 646 + 647 + ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev, 648 + VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id, 649 + VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE, 650 + &ctx_size); 651 + if (ret) 652 + goto err; 653 + 654 + if (!ctx_size) { 655 + ret = -EINVAL; 656 + goto err; 657 + } 658 + 659 + ret = virtiovf_read_device_context_chunk(migf, ctx_size); 660 + if (ret) 661 + goto err; 662 + 663 + migf->state = VIRTIOVF_MIGF_STATE_COMPLETE; 664 + return 0; 665 + 666 + err: 667 + migf->state = VIRTIOVF_MIGF_STATE_ERROR; 668 + return ret; 669 + } 670 + 671 + static struct virtiovf_migration_file * 672 + virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev, 673 + bool pre_copy) 674 + { 675 + struct virtiovf_migration_file *migf; 676 + u32 ctx_size; 677 + u32 obj_id; 678 + int ret; 679 + 680 + migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT); 681 + if (!migf) 682 + return ERR_PTR(-ENOMEM); 683 + 684 + migf->filp = anon_inode_getfile("virtiovf_mig", &virtiovf_save_fops, migf, 685 + O_RDONLY); 686 + if (IS_ERR(migf->filp)) { 687 + ret = PTR_ERR(migf->filp); 688 + kfree(migf); 689 + return ERR_PTR(ret); 690 + } 691 + 692 + stream_open(migf->filp->f_inode, migf->filp); 693 + mutex_init(&migf->lock); 694 + INIT_LIST_HEAD(&migf->buf_list); 695 + INIT_LIST_HEAD(&migf->avail_list); 696 + spin_lock_init(&migf->list_lock); 697 + migf->virtvdev = virtvdev; 698 + 699 + lockdep_assert_held(&virtvdev->state_mutex); 700 + ret = virtiovf_pci_alloc_obj_id(virtvdev, VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET, 701 + &obj_id); 702 + if (ret) 703 + goto out; 704 + 705 + migf->obj_id = obj_id; 706 + /* Mark as having a valid obj id which can be even 0 */ 707 + migf->has_obj_id = true; 708 + ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev, 709 + VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id, 710 + VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE, 711 + &ctx_size); 712 + if (ret) 713 + goto out_clean; 714 + 715 + if (!ctx_size) { 716 + ret = -EINVAL; 717 + goto out_clean; 718 + } 719 + 720 + ret = virtiovf_read_device_context_chunk(migf, ctx_size); 721 + if (ret) 722 + goto out_clean; 723 + 724 + if (pre_copy) { 725 + migf->pre_copy_initial_bytes = migf->max_pos; 726 + /* Arbitrarily set the pre-copy rate limit to 1-second intervals */ 727 + ratelimit_state_init(&migf->pre_copy_rl_state, 1 * HZ, 1); 728 + /* Prevent any rate messages upon its usage */ 729 + ratelimit_set_flags(&migf->pre_copy_rl_state, 730 + RATELIMIT_MSG_ON_RELEASE); 731 + migf->state = VIRTIOVF_MIGF_STATE_PRECOPY; 732 + } else { 733 + migf->state = VIRTIOVF_MIGF_STATE_COMPLETE; 734 + } 735 + 736 + return migf; 737 + 738 + out_clean: 739 + virtiovf_clean_migf_resources(migf); 740 + out: 741 + fput(migf->filp); 742 + return ERR_PTR(ret); 743 + } 744 + 745 + /* 746 + * Set the required object header at the beginning of the buffer. 747 + * The actual device parts data will be written post of the header offset. 748 + */ 749 + static int virtiovf_set_obj_cmd_header(struct virtiovf_data_buffer *vhca_buf) 750 + { 751 + struct virtio_admin_cmd_resource_obj_cmd_hdr obj_hdr = {}; 752 + struct page *page; 753 + u8 *to_buff; 754 + 755 + obj_hdr.type = cpu_to_le16(VIRTIO_RESOURCE_OBJ_DEV_PARTS); 756 + obj_hdr.id = cpu_to_le32(vhca_buf->migf->obj_id); 757 + page = virtiovf_get_migration_page(vhca_buf, 0); 758 + if (!page) 759 + return -EINVAL; 760 + to_buff = kmap_local_page(page); 761 + memcpy(to_buff, &obj_hdr, sizeof(obj_hdr)); 762 + kunmap_local(to_buff); 763 + 764 + /* Mark the buffer as including the header object data */ 765 + vhca_buf->include_header_object = 1; 766 + return 0; 767 + } 768 + 769 + static int 770 + virtiovf_append_page_to_mig_buf(struct virtiovf_data_buffer *vhca_buf, 771 + const char __user **buf, size_t *len, 772 + loff_t *pos, ssize_t *done) 773 + { 774 + unsigned long offset; 775 + size_t page_offset; 776 + struct page *page; 777 + size_t page_len; 778 + u8 *to_buff; 779 + int ret; 780 + 781 + offset = *pos - vhca_buf->start_pos; 782 + 783 + if (vhca_buf->include_header_object) 784 + /* The buffer holds the object header, update the offset accordingly */ 785 + offset += sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr); 786 + 787 + page_offset = offset % PAGE_SIZE; 788 + 789 + page = virtiovf_get_migration_page(vhca_buf, offset - page_offset); 790 + if (!page) 791 + return -EINVAL; 792 + 793 + page_len = min_t(size_t, *len, PAGE_SIZE - page_offset); 794 + to_buff = kmap_local_page(page); 795 + ret = copy_from_user(to_buff + page_offset, *buf, page_len); 796 + kunmap_local(to_buff); 797 + if (ret) 798 + return -EFAULT; 799 + 800 + *pos += page_len; 801 + *done += page_len; 802 + *buf += page_len; 803 + *len -= page_len; 804 + vhca_buf->length += page_len; 805 + return 0; 806 + } 807 + 808 + static ssize_t 809 + virtiovf_resume_read_chunk(struct virtiovf_migration_file *migf, 810 + struct virtiovf_data_buffer *vhca_buf, 811 + size_t chunk_size, const char __user **buf, 812 + size_t *len, loff_t *pos, ssize_t *done, 813 + bool *has_work) 814 + { 815 + size_t copy_len, to_copy; 816 + int ret; 817 + 818 + to_copy = min_t(size_t, *len, chunk_size - vhca_buf->length); 819 + copy_len = to_copy; 820 + while (to_copy) { 821 + ret = virtiovf_append_page_to_mig_buf(vhca_buf, buf, &to_copy, 822 + pos, done); 823 + if (ret) 824 + return ret; 825 + } 826 + 827 + *len -= copy_len; 828 + if (vhca_buf->length == chunk_size) { 829 + migf->load_state = VIRTIOVF_LOAD_STATE_LOAD_CHUNK; 830 + migf->max_pos += chunk_size; 831 + *has_work = true; 832 + } 833 + 834 + return 0; 835 + } 836 + 837 + static int 838 + virtiovf_resume_read_header_data(struct virtiovf_migration_file *migf, 839 + struct virtiovf_data_buffer *vhca_buf, 840 + const char __user **buf, size_t *len, 841 + loff_t *pos, ssize_t *done) 842 + { 843 + size_t copy_len, to_copy; 844 + size_t required_data; 845 + int ret; 846 + 847 + required_data = migf->record_size - vhca_buf->length; 848 + to_copy = min_t(size_t, *len, required_data); 849 + copy_len = to_copy; 850 + while (to_copy) { 851 + ret = virtiovf_append_page_to_mig_buf(vhca_buf, buf, &to_copy, 852 + pos, done); 853 + if (ret) 854 + return ret; 855 + } 856 + 857 + *len -= copy_len; 858 + if (vhca_buf->length == migf->record_size) { 859 + switch (migf->record_tag) { 860 + default: 861 + /* Optional tag */ 862 + break; 863 + } 864 + 865 + migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER; 866 + migf->max_pos += migf->record_size; 867 + vhca_buf->length = 0; 868 + } 869 + 870 + return 0; 871 + } 872 + 873 + static int 874 + virtiovf_resume_read_header(struct virtiovf_migration_file *migf, 875 + struct virtiovf_data_buffer *vhca_buf, 876 + const char __user **buf, 877 + size_t *len, loff_t *pos, 878 + ssize_t *done, bool *has_work) 879 + { 880 + struct page *page; 881 + size_t copy_len; 882 + u8 *to_buff; 883 + int ret; 884 + 885 + copy_len = min_t(size_t, *len, 886 + sizeof(struct virtiovf_migration_header) - vhca_buf->length); 887 + page = virtiovf_get_migration_page(vhca_buf, 0); 888 + if (!page) 889 + return -EINVAL; 890 + to_buff = kmap_local_page(page); 891 + ret = copy_from_user(to_buff + vhca_buf->length, *buf, copy_len); 892 + if (ret) { 893 + ret = -EFAULT; 894 + goto end; 895 + } 896 + 897 + *buf += copy_len; 898 + *pos += copy_len; 899 + *done += copy_len; 900 + *len -= copy_len; 901 + vhca_buf->length += copy_len; 902 + if (vhca_buf->length == sizeof(struct virtiovf_migration_header)) { 903 + u64 record_size; 904 + u32 flags; 905 + 906 + record_size = le64_to_cpup((__le64 *)to_buff); 907 + if (record_size > MAX_LOAD_SIZE) { 908 + ret = -ENOMEM; 909 + goto end; 910 + } 911 + 912 + migf->record_size = record_size; 913 + flags = le32_to_cpup((__le32 *)(to_buff + 914 + offsetof(struct virtiovf_migration_header, flags))); 915 + migf->record_tag = le32_to_cpup((__le32 *)(to_buff + 916 + offsetof(struct virtiovf_migration_header, tag))); 917 + switch (migf->record_tag) { 918 + case VIRTIOVF_MIGF_HEADER_TAG_DEVICE_DATA: 919 + migf->load_state = VIRTIOVF_LOAD_STATE_PREP_CHUNK; 920 + break; 921 + default: 922 + if (!(flags & VIRTIOVF_MIGF_HEADER_FLAGS_TAG_OPTIONAL)) { 923 + ret = -EOPNOTSUPP; 924 + goto end; 925 + } 926 + /* We may read and skip this optional record data */ 927 + migf->load_state = VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA; 928 + } 929 + 930 + migf->max_pos += vhca_buf->length; 931 + vhca_buf->length = 0; 932 + *has_work = true; 933 + } 934 + end: 935 + kunmap_local(to_buff); 936 + return ret; 937 + } 938 + 939 + static ssize_t virtiovf_resume_write(struct file *filp, const char __user *buf, 940 + size_t len, loff_t *pos) 941 + { 942 + struct virtiovf_migration_file *migf = filp->private_data; 943 + struct virtiovf_data_buffer *vhca_buf = migf->buf; 944 + struct virtiovf_data_buffer *vhca_buf_header = migf->buf_header; 945 + unsigned int orig_length; 946 + bool has_work = false; 947 + ssize_t done = 0; 948 + int ret = 0; 949 + 950 + if (pos) 951 + return -ESPIPE; 952 + 953 + pos = &filp->f_pos; 954 + if (*pos < vhca_buf->start_pos) 955 + return -EINVAL; 956 + 957 + mutex_lock(&migf->virtvdev->state_mutex); 958 + mutex_lock(&migf->lock); 959 + if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) { 960 + done = -ENODEV; 961 + goto out_unlock; 962 + } 963 + 964 + while (len || has_work) { 965 + has_work = false; 966 + switch (migf->load_state) { 967 + case VIRTIOVF_LOAD_STATE_READ_HEADER: 968 + ret = virtiovf_resume_read_header(migf, vhca_buf_header, &buf, 969 + &len, pos, &done, &has_work); 970 + if (ret) 971 + goto out_unlock; 972 + break; 973 + case VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA: 974 + if (vhca_buf_header->allocated_length < migf->record_size) { 975 + virtiovf_free_data_buffer(vhca_buf_header); 976 + 977 + migf->buf_header = virtiovf_alloc_data_buffer(migf, 978 + migf->record_size); 979 + if (IS_ERR(migf->buf_header)) { 980 + ret = PTR_ERR(migf->buf_header); 981 + migf->buf_header = NULL; 982 + goto out_unlock; 983 + } 984 + 985 + vhca_buf_header = migf->buf_header; 986 + } 987 + 988 + vhca_buf_header->start_pos = migf->max_pos; 989 + migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER_DATA; 990 + break; 991 + case VIRTIOVF_LOAD_STATE_READ_HEADER_DATA: 992 + ret = virtiovf_resume_read_header_data(migf, vhca_buf_header, 993 + &buf, &len, pos, &done); 994 + if (ret) 995 + goto out_unlock; 996 + break; 997 + case VIRTIOVF_LOAD_STATE_PREP_CHUNK: 998 + { 999 + u32 cmd_size = migf->record_size + 1000 + sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr); 1001 + 1002 + /* 1003 + * The DMA map/unmap is managed in virtio layer, we just need to extend 1004 + * the SG pages to hold the extra required chunk data. 1005 + */ 1006 + if (vhca_buf->allocated_length < cmd_size) { 1007 + ret = virtiovf_add_migration_pages(vhca_buf, 1008 + DIV_ROUND_UP_ULL(cmd_size - vhca_buf->allocated_length, 1009 + PAGE_SIZE)); 1010 + if (ret) 1011 + goto out_unlock; 1012 + } 1013 + 1014 + vhca_buf->start_pos = migf->max_pos; 1015 + migf->load_state = VIRTIOVF_LOAD_STATE_READ_CHUNK; 1016 + break; 1017 + } 1018 + case VIRTIOVF_LOAD_STATE_READ_CHUNK: 1019 + ret = virtiovf_resume_read_chunk(migf, vhca_buf, migf->record_size, 1020 + &buf, &len, pos, &done, &has_work); 1021 + if (ret) 1022 + goto out_unlock; 1023 + break; 1024 + case VIRTIOVF_LOAD_STATE_LOAD_CHUNK: 1025 + /* Mark the last SG entry and set its length */ 1026 + sg_mark_end(vhca_buf->last_offset_sg); 1027 + orig_length = vhca_buf->last_offset_sg->length; 1028 + /* Length should include the resource object command header */ 1029 + vhca_buf->last_offset_sg->length = vhca_buf->length + 1030 + sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr) - 1031 + vhca_buf->last_offset; 1032 + ret = virtio_pci_admin_dev_parts_set(migf->virtvdev->core_device.pdev, 1033 + vhca_buf->table.sgt.sgl); 1034 + /* Restore the original SG data */ 1035 + vhca_buf->last_offset_sg->length = orig_length; 1036 + sg_unmark_end(vhca_buf->last_offset_sg); 1037 + if (ret) 1038 + goto out_unlock; 1039 + migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER; 1040 + /* be ready for reading the next chunk */ 1041 + vhca_buf->length = 0; 1042 + break; 1043 + default: 1044 + break; 1045 + } 1046 + } 1047 + 1048 + out_unlock: 1049 + if (ret) 1050 + migf->state = VIRTIOVF_MIGF_STATE_ERROR; 1051 + mutex_unlock(&migf->lock); 1052 + virtiovf_state_mutex_unlock(migf->virtvdev); 1053 + return ret ? ret : done; 1054 + } 1055 + 1056 + static const struct file_operations virtiovf_resume_fops = { 1057 + .owner = THIS_MODULE, 1058 + .write = virtiovf_resume_write, 1059 + .release = virtiovf_release_file, 1060 + }; 1061 + 1062 + static struct virtiovf_migration_file * 1063 + virtiovf_pci_resume_device_data(struct virtiovf_pci_core_device *virtvdev) 1064 + { 1065 + struct virtiovf_migration_file *migf; 1066 + struct virtiovf_data_buffer *buf; 1067 + u32 obj_id; 1068 + int ret; 1069 + 1070 + migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT); 1071 + if (!migf) 1072 + return ERR_PTR(-ENOMEM); 1073 + 1074 + migf->filp = anon_inode_getfile("virtiovf_mig", &virtiovf_resume_fops, migf, 1075 + O_WRONLY); 1076 + if (IS_ERR(migf->filp)) { 1077 + ret = PTR_ERR(migf->filp); 1078 + kfree(migf); 1079 + return ERR_PTR(ret); 1080 + } 1081 + 1082 + stream_open(migf->filp->f_inode, migf->filp); 1083 + mutex_init(&migf->lock); 1084 + INIT_LIST_HEAD(&migf->buf_list); 1085 + INIT_LIST_HEAD(&migf->avail_list); 1086 + spin_lock_init(&migf->list_lock); 1087 + 1088 + buf = virtiovf_alloc_data_buffer(migf, VIRTIOVF_TARGET_INITIAL_BUF_SIZE); 1089 + if (IS_ERR(buf)) { 1090 + ret = PTR_ERR(buf); 1091 + goto out; 1092 + } 1093 + 1094 + migf->buf = buf; 1095 + 1096 + buf = virtiovf_alloc_data_buffer(migf, 1097 + sizeof(struct virtiovf_migration_header)); 1098 + if (IS_ERR(buf)) { 1099 + ret = PTR_ERR(buf); 1100 + goto out_clean; 1101 + } 1102 + 1103 + migf->buf_header = buf; 1104 + migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER; 1105 + 1106 + migf->virtvdev = virtvdev; 1107 + ret = virtiovf_pci_alloc_obj_id(virtvdev, VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_SET, 1108 + &obj_id); 1109 + if (ret) 1110 + goto out_clean; 1111 + 1112 + migf->obj_id = obj_id; 1113 + /* Mark as having a valid obj id which can be even 0 */ 1114 + migf->has_obj_id = true; 1115 + ret = virtiovf_set_obj_cmd_header(migf->buf); 1116 + if (ret) 1117 + goto out_clean; 1118 + 1119 + return migf; 1120 + 1121 + out_clean: 1122 + virtiovf_clean_migf_resources(migf); 1123 + out: 1124 + fput(migf->filp); 1125 + return ERR_PTR(ret); 1126 + } 1127 + 1128 + static struct file * 1129 + virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev, 1130 + u32 new) 1131 + { 1132 + u32 cur = virtvdev->mig_state; 1133 + int ret; 1134 + 1135 + if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) { 1136 + /* NOP */ 1137 + return NULL; 1138 + } 1139 + 1140 + if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P) { 1141 + /* NOP */ 1142 + return NULL; 1143 + } 1144 + 1145 + if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) || 1146 + (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) { 1147 + ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev, 1148 + BIT(VIRTIO_ADMIN_CMD_DEV_MODE_F_STOPPED)); 1149 + if (ret) 1150 + return ERR_PTR(ret); 1151 + return NULL; 1152 + } 1153 + 1154 + if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) || 1155 + (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY)) { 1156 + ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev, 0); 1157 + if (ret) 1158 + return ERR_PTR(ret); 1159 + return NULL; 1160 + } 1161 + 1162 + if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) { 1163 + struct virtiovf_migration_file *migf; 1164 + 1165 + migf = virtiovf_pci_save_device_data(virtvdev, false); 1166 + if (IS_ERR(migf)) 1167 + return ERR_CAST(migf); 1168 + get_file(migf->filp); 1169 + virtvdev->saving_migf = migf; 1170 + return migf->filp; 1171 + } 1172 + 1173 + if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) || 1174 + (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) || 1175 + (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P)) { 1176 + virtiovf_disable_fds(virtvdev); 1177 + return NULL; 1178 + } 1179 + 1180 + if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) { 1181 + struct virtiovf_migration_file *migf; 1182 + 1183 + migf = virtiovf_pci_resume_device_data(virtvdev); 1184 + if (IS_ERR(migf)) 1185 + return ERR_CAST(migf); 1186 + get_file(migf->filp); 1187 + virtvdev->resuming_migf = migf; 1188 + return migf->filp; 1189 + } 1190 + 1191 + if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { 1192 + virtiovf_disable_fds(virtvdev); 1193 + return NULL; 1194 + } 1195 + 1196 + if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) || 1197 + (cur == VFIO_DEVICE_STATE_RUNNING_P2P && 1198 + new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) { 1199 + struct virtiovf_migration_file *migf; 1200 + 1201 + migf = virtiovf_pci_save_device_data(virtvdev, true); 1202 + if (IS_ERR(migf)) 1203 + return ERR_CAST(migf); 1204 + get_file(migf->filp); 1205 + virtvdev->saving_migf = migf; 1206 + return migf->filp; 1207 + } 1208 + 1209 + if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) { 1210 + ret = virtiovf_pci_save_device_final_data(virtvdev); 1211 + return ret ? ERR_PTR(ret) : NULL; 1212 + } 1213 + 1214 + /* 1215 + * vfio_mig_get_next_state() does not use arcs other than the above 1216 + */ 1217 + WARN_ON(true); 1218 + return ERR_PTR(-EINVAL); 1219 + } 1220 + 1221 + static struct file * 1222 + virtiovf_pci_set_device_state(struct vfio_device *vdev, 1223 + enum vfio_device_mig_state new_state) 1224 + { 1225 + struct virtiovf_pci_core_device *virtvdev = container_of( 1226 + vdev, struct virtiovf_pci_core_device, core_device.vdev); 1227 + enum vfio_device_mig_state next_state; 1228 + struct file *res = NULL; 1229 + int ret; 1230 + 1231 + mutex_lock(&virtvdev->state_mutex); 1232 + while (new_state != virtvdev->mig_state) { 1233 + ret = vfio_mig_get_next_state(vdev, virtvdev->mig_state, 1234 + new_state, &next_state); 1235 + if (ret) { 1236 + res = ERR_PTR(ret); 1237 + break; 1238 + } 1239 + res = virtiovf_pci_step_device_state_locked(virtvdev, next_state); 1240 + if (IS_ERR(res)) 1241 + break; 1242 + virtvdev->mig_state = next_state; 1243 + if (WARN_ON(res && new_state != virtvdev->mig_state)) { 1244 + fput(res); 1245 + res = ERR_PTR(-EINVAL); 1246 + break; 1247 + } 1248 + } 1249 + virtiovf_state_mutex_unlock(virtvdev); 1250 + return res; 1251 + } 1252 + 1253 + static int virtiovf_pci_get_device_state(struct vfio_device *vdev, 1254 + enum vfio_device_mig_state *curr_state) 1255 + { 1256 + struct virtiovf_pci_core_device *virtvdev = container_of( 1257 + vdev, struct virtiovf_pci_core_device, core_device.vdev); 1258 + 1259 + mutex_lock(&virtvdev->state_mutex); 1260 + *curr_state = virtvdev->mig_state; 1261 + virtiovf_state_mutex_unlock(virtvdev); 1262 + return 0; 1263 + } 1264 + 1265 + static int virtiovf_pci_get_data_size(struct vfio_device *vdev, 1266 + unsigned long *stop_copy_length) 1267 + { 1268 + struct virtiovf_pci_core_device *virtvdev = container_of( 1269 + vdev, struct virtiovf_pci_core_device, core_device.vdev); 1270 + bool obj_id_exists; 1271 + u32 res_size; 1272 + u32 obj_id; 1273 + int ret; 1274 + 1275 + mutex_lock(&virtvdev->state_mutex); 1276 + obj_id_exists = virtvdev->saving_migf && virtvdev->saving_migf->has_obj_id; 1277 + if (!obj_id_exists) { 1278 + ret = virtiovf_pci_alloc_obj_id(virtvdev, 1279 + VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET, 1280 + &obj_id); 1281 + if (ret) 1282 + goto end; 1283 + } else { 1284 + obj_id = virtvdev->saving_migf->obj_id; 1285 + } 1286 + 1287 + ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev, 1288 + VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id, 1289 + VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE, 1290 + &res_size); 1291 + if (!ret) 1292 + *stop_copy_length = res_size; 1293 + 1294 + /* 1295 + * We can't leave this obj_id alive if didn't exist before, otherwise, it might 1296 + * stay alive, even without an active migration flow (e.g. migration was cancelled) 1297 + */ 1298 + if (!obj_id_exists) 1299 + virtiovf_pci_free_obj_id(virtvdev, obj_id); 1300 + end: 1301 + virtiovf_state_mutex_unlock(virtvdev); 1302 + return ret; 1303 + } 1304 + 1305 + static const struct vfio_migration_ops virtvdev_pci_mig_ops = { 1306 + .migration_set_state = virtiovf_pci_set_device_state, 1307 + .migration_get_state = virtiovf_pci_get_device_state, 1308 + .migration_get_data_size = virtiovf_pci_get_data_size, 1309 + }; 1310 + 1311 + void virtiovf_set_migratable(struct virtiovf_pci_core_device *virtvdev) 1312 + { 1313 + virtvdev->migrate_cap = 1; 1314 + mutex_init(&virtvdev->state_mutex); 1315 + spin_lock_init(&virtvdev->reset_lock); 1316 + virtvdev->core_device.vdev.migration_flags = 1317 + VFIO_MIGRATION_STOP_COPY | 1318 + VFIO_MIGRATION_P2P | 1319 + VFIO_MIGRATION_PRE_COPY; 1320 + virtvdev->core_device.vdev.mig_ops = &virtvdev_pci_mig_ops; 1321 + } 1322 + 1323 + void virtiovf_open_migration(struct virtiovf_pci_core_device *virtvdev) 1324 + { 1325 + if (!virtvdev->migrate_cap) 1326 + return; 1327 + 1328 + virtvdev->mig_state = VFIO_DEVICE_STATE_RUNNING; 1329 + } 1330 + 1331 + void virtiovf_close_migration(struct virtiovf_pci_core_device *virtvdev) 1332 + { 1333 + if (!virtvdev->migrate_cap) 1334 + return; 1335 + 1336 + virtiovf_disable_fds(virtvdev); 1337 + }
+17 -2
drivers/virtio/virtio_pci_common.h
··· 48 48 /* Protects virtqueue access. */ 49 49 spinlock_t lock; 50 50 u64 supported_cmds; 51 + u64 supported_caps; 52 + u8 max_dev_parts_objects; 53 + struct ida dev_parts_ida; 51 54 /* Name of the admin queue: avq.$vq_index. */ 52 55 char name[10]; 53 56 u16 vq_index; ··· 170 167 BIT_ULL(VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ) | \ 171 168 BIT_ULL(VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO)) 172 169 170 + #define VIRTIO_DEV_PARTS_ADMIN_CMD_BITMAP \ 171 + (BIT_ULL(VIRTIO_ADMIN_CMD_CAP_ID_LIST_QUERY) | \ 172 + BIT_ULL(VIRTIO_ADMIN_CMD_DRIVER_CAP_SET) | \ 173 + BIT_ULL(VIRTIO_ADMIN_CMD_DEVICE_CAP_GET) | \ 174 + BIT_ULL(VIRTIO_ADMIN_CMD_RESOURCE_OBJ_CREATE) | \ 175 + BIT_ULL(VIRTIO_ADMIN_CMD_RESOURCE_OBJ_DESTROY) | \ 176 + BIT_ULL(VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_GET) | \ 177 + BIT_ULL(VIRTIO_ADMIN_CMD_DEV_PARTS_GET) | \ 178 + BIT_ULL(VIRTIO_ADMIN_CMD_DEV_PARTS_SET) | \ 179 + BIT_ULL(VIRTIO_ADMIN_CMD_DEV_MODE_SET)) 180 + 173 181 /* Unlike modern drivers which support hardware virtio devices, legacy drivers 174 182 * assume software-based devices: e.g. they don't use proper memory barriers 175 183 * on ARM, use big endian on PPC, etc. X86 drivers are mostly ok though, more 176 184 * or less by chance. For now, only support legacy IO on X86. 177 185 */ 178 186 #ifdef CONFIG_VIRTIO_PCI_ADMIN_LEGACY 179 - #define VIRTIO_ADMIN_CMD_BITMAP VIRTIO_LEGACY_ADMIN_CMD_BITMAP 187 + #define VIRTIO_ADMIN_CMD_BITMAP (VIRTIO_LEGACY_ADMIN_CMD_BITMAP | \ 188 + VIRTIO_DEV_PARTS_ADMIN_CMD_BITMAP) 180 189 #else 181 - #define VIRTIO_ADMIN_CMD_BITMAP 0 190 + #define VIRTIO_ADMIN_CMD_BITMAP VIRTIO_DEV_PARTS_ADMIN_CMD_BITMAP 182 191 #endif 183 192 184 193 bool vp_is_avq(struct virtio_device *vdev, unsigned int index);
+456 -1
drivers/virtio/virtio_pci_modern.c
··· 15 15 */ 16 16 17 17 #include <linux/delay.h> 18 + #include <linux/virtio_pci_admin.h> 18 19 #define VIRTIO_PCI_NO_LEGACY 19 20 #define VIRTIO_RING_NO_LEGACY 20 21 #include "virtio_pci_common.h" ··· 55 54 spin_lock_irqsave(&admin_vq->lock, flags); 56 55 do { 57 56 virtqueue_disable_cb(vq); 58 - while ((cmd = virtqueue_get_buf(vq, &len))) 57 + while ((cmd = virtqueue_get_buf(vq, &len))) { 58 + cmd->result_sg_size = len; 59 59 complete(&cmd->completion); 60 + } 60 61 } while (!virtqueue_enable_cb(vq)); 61 62 spin_unlock_irqrestore(&admin_vq->lock, flags); 62 63 } ··· 221 218 kfree(data); 222 219 } 223 220 221 + static void 222 + virtio_pci_admin_cmd_dev_parts_objects_enable(struct virtio_device *virtio_dev) 223 + { 224 + struct virtio_pci_device *vp_dev = to_vp_device(virtio_dev); 225 + struct virtio_admin_cmd_cap_get_data *get_data; 226 + struct virtio_admin_cmd_cap_set_data *set_data; 227 + struct virtio_dev_parts_cap *result; 228 + struct virtio_admin_cmd cmd = {}; 229 + struct scatterlist result_sg; 230 + struct scatterlist data_sg; 231 + u8 resource_objects_limit; 232 + u16 set_data_size; 233 + int ret; 234 + 235 + get_data = kzalloc(sizeof(*get_data), GFP_KERNEL); 236 + if (!get_data) 237 + return; 238 + 239 + result = kzalloc(sizeof(*result), GFP_KERNEL); 240 + if (!result) 241 + goto end; 242 + 243 + get_data->id = cpu_to_le16(VIRTIO_DEV_PARTS_CAP); 244 + sg_init_one(&data_sg, get_data, sizeof(*get_data)); 245 + sg_init_one(&result_sg, result, sizeof(*result)); 246 + cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_DEVICE_CAP_GET); 247 + cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); 248 + cmd.data_sg = &data_sg; 249 + cmd.result_sg = &result_sg; 250 + ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); 251 + if (ret) 252 + goto err_get; 253 + 254 + set_data_size = sizeof(*set_data) + sizeof(*result); 255 + set_data = kzalloc(set_data_size, GFP_KERNEL); 256 + if (!set_data) 257 + goto err_get; 258 + 259 + set_data->id = cpu_to_le16(VIRTIO_DEV_PARTS_CAP); 260 + 261 + /* Set the limit to the minimum value between the GET and SET values 262 + * supported by the device. Since the obj_id for VIRTIO_DEV_PARTS_CAP 263 + * is a globally unique value per PF, there is no possibility of 264 + * overlap between GET and SET operations. 265 + */ 266 + resource_objects_limit = min(result->get_parts_resource_objects_limit, 267 + result->set_parts_resource_objects_limit); 268 + result->get_parts_resource_objects_limit = resource_objects_limit; 269 + result->set_parts_resource_objects_limit = resource_objects_limit; 270 + memcpy(set_data->cap_specific_data, result, sizeof(*result)); 271 + sg_init_one(&data_sg, set_data, set_data_size); 272 + cmd.data_sg = &data_sg; 273 + cmd.result_sg = NULL; 274 + cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_DRIVER_CAP_SET); 275 + ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); 276 + if (ret) 277 + goto err_set; 278 + 279 + /* Allocate IDR to manage the dev caps objects */ 280 + ida_init(&vp_dev->admin_vq.dev_parts_ida); 281 + vp_dev->admin_vq.max_dev_parts_objects = resource_objects_limit; 282 + 283 + err_set: 284 + kfree(set_data); 285 + err_get: 286 + kfree(result); 287 + end: 288 + kfree(get_data); 289 + } 290 + 291 + static void virtio_pci_admin_cmd_cap_init(struct virtio_device *virtio_dev) 292 + { 293 + struct virtio_pci_device *vp_dev = to_vp_device(virtio_dev); 294 + struct virtio_admin_cmd_query_cap_id_result *data; 295 + struct virtio_admin_cmd cmd = {}; 296 + struct scatterlist result_sg; 297 + int ret; 298 + 299 + data = kzalloc(sizeof(*data), GFP_KERNEL); 300 + if (!data) 301 + return; 302 + 303 + sg_init_one(&result_sg, data, sizeof(*data)); 304 + cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_CAP_ID_LIST_QUERY); 305 + cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); 306 + cmd.result_sg = &result_sg; 307 + 308 + ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); 309 + if (ret) 310 + goto end; 311 + 312 + /* Max number of caps fits into a single u64 */ 313 + BUILD_BUG_ON(sizeof(data->supported_caps) > sizeof(u64)); 314 + 315 + vp_dev->admin_vq.supported_caps = le64_to_cpu(data->supported_caps[0]); 316 + 317 + if (!(vp_dev->admin_vq.supported_caps & (1 << VIRTIO_DEV_PARTS_CAP))) 318 + goto end; 319 + 320 + virtio_pci_admin_cmd_dev_parts_objects_enable(virtio_dev); 321 + end: 322 + kfree(data); 323 + } 324 + 224 325 static void vp_modern_avq_activate(struct virtio_device *vdev) 225 326 { 226 327 if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) 227 328 return; 228 329 229 330 virtio_pci_admin_cmd_list_init(vdev); 331 + virtio_pci_admin_cmd_cap_init(vdev); 230 332 } 231 333 232 334 static void vp_modern_avq_cleanup(struct virtio_device *vdev) ··· 865 757 866 758 return true; 867 759 } 760 + 761 + /* 762 + * virtio_pci_admin_has_dev_parts - Checks whether the device parts 763 + * functionality is supported 764 + * @pdev: VF pci_dev 765 + * 766 + * Returns true on success. 767 + */ 768 + bool virtio_pci_admin_has_dev_parts(struct pci_dev *pdev) 769 + { 770 + struct virtio_device *virtio_dev = virtio_pci_vf_get_pf_dev(pdev); 771 + struct virtio_pci_device *vp_dev; 772 + 773 + if (!virtio_dev) 774 + return false; 775 + 776 + if (!virtio_has_feature(virtio_dev, VIRTIO_F_ADMIN_VQ)) 777 + return false; 778 + 779 + vp_dev = to_vp_device(virtio_dev); 780 + 781 + if (!((vp_dev->admin_vq.supported_cmds & VIRTIO_DEV_PARTS_ADMIN_CMD_BITMAP) == 782 + VIRTIO_DEV_PARTS_ADMIN_CMD_BITMAP)) 783 + return false; 784 + 785 + return vp_dev->admin_vq.max_dev_parts_objects; 786 + } 787 + EXPORT_SYMBOL_GPL(virtio_pci_admin_has_dev_parts); 788 + 789 + /* 790 + * virtio_pci_admin_mode_set - Sets the mode of a member device 791 + * @pdev: VF pci_dev 792 + * @flags: device mode's flags 793 + * 794 + * Note: caller must serialize access for the given device. 795 + * Returns 0 on success, or negative on failure. 796 + */ 797 + int virtio_pci_admin_mode_set(struct pci_dev *pdev, u8 flags) 798 + { 799 + struct virtio_device *virtio_dev = virtio_pci_vf_get_pf_dev(pdev); 800 + struct virtio_admin_cmd_dev_mode_set_data *data; 801 + struct virtio_admin_cmd cmd = {}; 802 + struct scatterlist data_sg; 803 + int vf_id; 804 + int ret; 805 + 806 + if (!virtio_dev) 807 + return -ENODEV; 808 + 809 + vf_id = pci_iov_vf_id(pdev); 810 + if (vf_id < 0) 811 + return vf_id; 812 + 813 + data = kzalloc(sizeof(*data), GFP_KERNEL); 814 + if (!data) 815 + return -ENOMEM; 816 + 817 + data->flags = flags; 818 + sg_init_one(&data_sg, data, sizeof(*data)); 819 + cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_DEV_MODE_SET); 820 + cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); 821 + cmd.group_member_id = cpu_to_le64(vf_id + 1); 822 + cmd.data_sg = &data_sg; 823 + ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); 824 + 825 + kfree(data); 826 + return ret; 827 + } 828 + EXPORT_SYMBOL_GPL(virtio_pci_admin_mode_set); 829 + 830 + /* 831 + * virtio_pci_admin_obj_create - Creates an object for a given type and operation, 832 + * following the max objects that can be created for that request. 833 + * @pdev: VF pci_dev 834 + * @obj_type: Object type 835 + * @operation_type: Operation type 836 + * @obj_id: Output unique object id 837 + * 838 + * Note: caller must serialize access for the given device. 839 + * Returns 0 on success, or negative on failure. 840 + */ 841 + int virtio_pci_admin_obj_create(struct pci_dev *pdev, u16 obj_type, u8 operation_type, 842 + u32 *obj_id) 843 + { 844 + struct virtio_device *virtio_dev = virtio_pci_vf_get_pf_dev(pdev); 845 + u16 data_size = sizeof(struct virtio_admin_cmd_resource_obj_create_data); 846 + struct virtio_admin_cmd_resource_obj_create_data *obj_create_data; 847 + struct virtio_resource_obj_dev_parts obj_dev_parts = {}; 848 + struct virtio_pci_admin_vq *avq; 849 + struct virtio_admin_cmd cmd = {}; 850 + struct scatterlist data_sg; 851 + void *data; 852 + int id = -1; 853 + int vf_id; 854 + int ret; 855 + 856 + if (!virtio_dev) 857 + return -ENODEV; 858 + 859 + vf_id = pci_iov_vf_id(pdev); 860 + if (vf_id < 0) 861 + return vf_id; 862 + 863 + if (obj_type != VIRTIO_RESOURCE_OBJ_DEV_PARTS) 864 + return -EOPNOTSUPP; 865 + 866 + if (operation_type != VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET && 867 + operation_type != VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_SET) 868 + return -EINVAL; 869 + 870 + avq = &to_vp_device(virtio_dev)->admin_vq; 871 + if (!avq->max_dev_parts_objects) 872 + return -EOPNOTSUPP; 873 + 874 + id = ida_alloc_range(&avq->dev_parts_ida, 0, 875 + avq->max_dev_parts_objects - 1, GFP_KERNEL); 876 + if (id < 0) 877 + return id; 878 + 879 + *obj_id = id; 880 + data_size += sizeof(obj_dev_parts); 881 + data = kzalloc(data_size, GFP_KERNEL); 882 + if (!data) { 883 + ret = -ENOMEM; 884 + goto end; 885 + } 886 + 887 + obj_create_data = data; 888 + obj_create_data->hdr.type = cpu_to_le16(obj_type); 889 + obj_create_data->hdr.id = cpu_to_le32(*obj_id); 890 + obj_dev_parts.type = operation_type; 891 + memcpy(obj_create_data->resource_obj_specific_data, &obj_dev_parts, 892 + sizeof(obj_dev_parts)); 893 + sg_init_one(&data_sg, data, data_size); 894 + cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_RESOURCE_OBJ_CREATE); 895 + cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); 896 + cmd.group_member_id = cpu_to_le64(vf_id + 1); 897 + cmd.data_sg = &data_sg; 898 + ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); 899 + 900 + kfree(data); 901 + end: 902 + if (ret) 903 + ida_free(&avq->dev_parts_ida, id); 904 + 905 + return ret; 906 + } 907 + EXPORT_SYMBOL_GPL(virtio_pci_admin_obj_create); 908 + 909 + /* 910 + * virtio_pci_admin_obj_destroy - Destroys an object of a given type and id 911 + * @pdev: VF pci_dev 912 + * @obj_type: Object type 913 + * @id: Object id 914 + * 915 + * Note: caller must serialize access for the given device. 916 + * Returns 0 on success, or negative on failure. 917 + */ 918 + int virtio_pci_admin_obj_destroy(struct pci_dev *pdev, u16 obj_type, u32 id) 919 + { 920 + struct virtio_device *virtio_dev = virtio_pci_vf_get_pf_dev(pdev); 921 + struct virtio_admin_cmd_resource_obj_cmd_hdr *data; 922 + struct virtio_pci_device *vp_dev; 923 + struct virtio_admin_cmd cmd = {}; 924 + struct scatterlist data_sg; 925 + int vf_id; 926 + int ret; 927 + 928 + if (!virtio_dev) 929 + return -ENODEV; 930 + 931 + vf_id = pci_iov_vf_id(pdev); 932 + if (vf_id < 0) 933 + return vf_id; 934 + 935 + if (obj_type != VIRTIO_RESOURCE_OBJ_DEV_PARTS) 936 + return -EINVAL; 937 + 938 + data = kzalloc(sizeof(*data), GFP_KERNEL); 939 + if (!data) 940 + return -ENOMEM; 941 + 942 + data->type = cpu_to_le16(obj_type); 943 + data->id = cpu_to_le32(id); 944 + sg_init_one(&data_sg, data, sizeof(*data)); 945 + cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_RESOURCE_OBJ_DESTROY); 946 + cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); 947 + cmd.group_member_id = cpu_to_le64(vf_id + 1); 948 + cmd.data_sg = &data_sg; 949 + ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); 950 + if (!ret) { 951 + vp_dev = to_vp_device(virtio_dev); 952 + ida_free(&vp_dev->admin_vq.dev_parts_ida, id); 953 + } 954 + 955 + kfree(data); 956 + return ret; 957 + } 958 + EXPORT_SYMBOL_GPL(virtio_pci_admin_obj_destroy); 959 + 960 + /* 961 + * virtio_pci_admin_dev_parts_metadata_get - Gets the metadata of the device parts 962 + * identified by the below attributes. 963 + * @pdev: VF pci_dev 964 + * @obj_type: Object type 965 + * @id: Object id 966 + * @metadata_type: Metadata type 967 + * @out: Upon success holds the output for 'metadata type size' 968 + * 969 + * Note: caller must serialize access for the given device. 970 + * Returns 0 on success, or negative on failure. 971 + */ 972 + int virtio_pci_admin_dev_parts_metadata_get(struct pci_dev *pdev, u16 obj_type, 973 + u32 id, u8 metadata_type, u32 *out) 974 + { 975 + struct virtio_device *virtio_dev = virtio_pci_vf_get_pf_dev(pdev); 976 + struct virtio_admin_cmd_dev_parts_metadata_result *result; 977 + struct virtio_admin_cmd_dev_parts_metadata_data *data; 978 + struct scatterlist data_sg, result_sg; 979 + struct virtio_admin_cmd cmd = {}; 980 + int vf_id; 981 + int ret; 982 + 983 + if (!virtio_dev) 984 + return -ENODEV; 985 + 986 + if (metadata_type != VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE) 987 + return -EOPNOTSUPP; 988 + 989 + vf_id = pci_iov_vf_id(pdev); 990 + if (vf_id < 0) 991 + return vf_id; 992 + 993 + data = kzalloc(sizeof(*data), GFP_KERNEL); 994 + if (!data) 995 + return -ENOMEM; 996 + 997 + result = kzalloc(sizeof(*result), GFP_KERNEL); 998 + if (!result) { 999 + ret = -ENOMEM; 1000 + goto end; 1001 + } 1002 + 1003 + data->hdr.type = cpu_to_le16(obj_type); 1004 + data->hdr.id = cpu_to_le32(id); 1005 + data->type = metadata_type; 1006 + sg_init_one(&data_sg, data, sizeof(*data)); 1007 + sg_init_one(&result_sg, result, sizeof(*result)); 1008 + cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_GET); 1009 + cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); 1010 + cmd.group_member_id = cpu_to_le64(vf_id + 1); 1011 + cmd.data_sg = &data_sg; 1012 + cmd.result_sg = &result_sg; 1013 + ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); 1014 + if (!ret) 1015 + *out = le32_to_cpu(result->parts_size.size); 1016 + 1017 + kfree(result); 1018 + end: 1019 + kfree(data); 1020 + return ret; 1021 + } 1022 + EXPORT_SYMBOL_GPL(virtio_pci_admin_dev_parts_metadata_get); 1023 + 1024 + /* 1025 + * virtio_pci_admin_dev_parts_get - Gets the device parts identified by the below attributes. 1026 + * @pdev: VF pci_dev 1027 + * @obj_type: Object type 1028 + * @id: Object id 1029 + * @get_type: Get type 1030 + * @res_sg: Upon success holds the output result data 1031 + * @res_size: Upon success holds the output result size 1032 + * 1033 + * Note: caller must serialize access for the given device. 1034 + * Returns 0 on success, or negative on failure. 1035 + */ 1036 + int virtio_pci_admin_dev_parts_get(struct pci_dev *pdev, u16 obj_type, u32 id, 1037 + u8 get_type, struct scatterlist *res_sg, 1038 + u32 *res_size) 1039 + { 1040 + struct virtio_device *virtio_dev = virtio_pci_vf_get_pf_dev(pdev); 1041 + struct virtio_admin_cmd_dev_parts_get_data *data; 1042 + struct scatterlist data_sg; 1043 + struct virtio_admin_cmd cmd = {}; 1044 + int vf_id; 1045 + int ret; 1046 + 1047 + if (!virtio_dev) 1048 + return -ENODEV; 1049 + 1050 + if (get_type != VIRTIO_ADMIN_CMD_DEV_PARTS_GET_TYPE_ALL) 1051 + return -EOPNOTSUPP; 1052 + 1053 + vf_id = pci_iov_vf_id(pdev); 1054 + if (vf_id < 0) 1055 + return vf_id; 1056 + 1057 + data = kzalloc(sizeof(*data), GFP_KERNEL); 1058 + if (!data) 1059 + return -ENOMEM; 1060 + 1061 + data->hdr.type = cpu_to_le16(obj_type); 1062 + data->hdr.id = cpu_to_le32(id); 1063 + data->type = get_type; 1064 + sg_init_one(&data_sg, data, sizeof(*data)); 1065 + cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_DEV_PARTS_GET); 1066 + cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); 1067 + cmd.group_member_id = cpu_to_le64(vf_id + 1); 1068 + cmd.data_sg = &data_sg; 1069 + cmd.result_sg = res_sg; 1070 + ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); 1071 + if (!ret) 1072 + *res_size = cmd.result_sg_size; 1073 + 1074 + kfree(data); 1075 + return ret; 1076 + } 1077 + EXPORT_SYMBOL_GPL(virtio_pci_admin_dev_parts_get); 1078 + 1079 + /* 1080 + * virtio_pci_admin_dev_parts_set - Sets the device parts identified by the below attributes. 1081 + * @pdev: VF pci_dev 1082 + * @data_sg: The device parts data, its layout follows struct virtio_admin_cmd_dev_parts_set_data 1083 + * 1084 + * Note: caller must serialize access for the given device. 1085 + * Returns 0 on success, or negative on failure. 1086 + */ 1087 + int virtio_pci_admin_dev_parts_set(struct pci_dev *pdev, struct scatterlist *data_sg) 1088 + { 1089 + struct virtio_device *virtio_dev = virtio_pci_vf_get_pf_dev(pdev); 1090 + struct virtio_admin_cmd cmd = {}; 1091 + int vf_id; 1092 + 1093 + if (!virtio_dev) 1094 + return -ENODEV; 1095 + 1096 + vf_id = pci_iov_vf_id(pdev); 1097 + if (vf_id < 0) 1098 + return vf_id; 1099 + 1100 + cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_DEV_PARTS_SET); 1101 + cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); 1102 + cmd.group_member_id = cpu_to_le64(vf_id + 1); 1103 + cmd.data_sg = data_sg; 1104 + return vp_modern_admin_cmd_exec(virtio_dev, &cmd); 1105 + } 1106 + EXPORT_SYMBOL_GPL(virtio_pci_admin_dev_parts_set); 868 1107 869 1108 static const struct virtio_config_ops virtio_pci_config_nodev_ops = { 870 1109 .get = NULL,
+1
include/linux/virtio.h
··· 120 120 struct scatterlist *data_sg; 121 121 struct scatterlist *result_sg; 122 122 struct completion completion; 123 + u32 result_sg_size; 123 124 int ret; 124 125 }; 125 126
+11
include/linux/virtio_pci_admin.h
··· 20 20 u64 *bar_offset); 21 21 #endif 22 22 23 + bool virtio_pci_admin_has_dev_parts(struct pci_dev *pdev); 24 + int virtio_pci_admin_mode_set(struct pci_dev *pdev, u8 mode); 25 + int virtio_pci_admin_obj_create(struct pci_dev *pdev, u16 obj_type, u8 operation_type, 26 + u32 *obj_id); 27 + int virtio_pci_admin_obj_destroy(struct pci_dev *pdev, u16 obj_type, u32 id); 28 + int virtio_pci_admin_dev_parts_metadata_get(struct pci_dev *pdev, u16 obj_type, 29 + u32 id, u8 metadata_type, u32 *out); 30 + int virtio_pci_admin_dev_parts_get(struct pci_dev *pdev, u16 obj_type, u32 id, 31 + u8 get_type, struct scatterlist *res_sg, u32 *res_size); 32 + int virtio_pci_admin_dev_parts_set(struct pci_dev *pdev, struct scatterlist *data_sg); 33 + 23 34 #endif /* _LINUX_VIRTIO_PCI_ADMIN_H */
+131
include/uapi/linux/virtio_pci.h
··· 40 40 #define _LINUX_VIRTIO_PCI_H 41 41 42 42 #include <linux/types.h> 43 + #include <linux/kernel.h> 43 44 44 45 #ifndef VIRTIO_PCI_NO_LEGACY 45 46 ··· 241 240 #define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ 0x5 242 241 #define VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO 0x6 243 242 243 + /* Device parts access commands. */ 244 + #define VIRTIO_ADMIN_CMD_CAP_ID_LIST_QUERY 0x7 245 + #define VIRTIO_ADMIN_CMD_DEVICE_CAP_GET 0x8 246 + #define VIRTIO_ADMIN_CMD_DRIVER_CAP_SET 0x9 247 + #define VIRTIO_ADMIN_CMD_RESOURCE_OBJ_CREATE 0xa 248 + #define VIRTIO_ADMIN_CMD_RESOURCE_OBJ_DESTROY 0xd 249 + #define VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_GET 0xe 250 + #define VIRTIO_ADMIN_CMD_DEV_PARTS_GET 0xf 251 + #define VIRTIO_ADMIN_CMD_DEV_PARTS_SET 0x10 252 + #define VIRTIO_ADMIN_CMD_DEV_MODE_SET 0x11 253 + 244 254 struct virtio_admin_cmd_hdr { 245 255 __le16 opcode; 246 256 /* ··· 296 284 297 285 struct virtio_admin_cmd_notify_info_result { 298 286 struct virtio_admin_cmd_notify_info_data entries[VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO]; 287 + }; 288 + 289 + #define VIRTIO_DEV_PARTS_CAP 0x0000 290 + 291 + struct virtio_dev_parts_cap { 292 + __u8 get_parts_resource_objects_limit; 293 + __u8 set_parts_resource_objects_limit; 294 + }; 295 + 296 + #define MAX_CAP_ID __KERNEL_DIV_ROUND_UP(VIRTIO_DEV_PARTS_CAP + 1, 64) 297 + 298 + struct virtio_admin_cmd_query_cap_id_result { 299 + __le64 supported_caps[MAX_CAP_ID]; 300 + }; 301 + 302 + struct virtio_admin_cmd_cap_get_data { 303 + __le16 id; 304 + __u8 reserved[6]; 305 + }; 306 + 307 + struct virtio_admin_cmd_cap_set_data { 308 + __le16 id; 309 + __u8 reserved[6]; 310 + __u8 cap_specific_data[]; 311 + }; 312 + 313 + struct virtio_admin_cmd_resource_obj_cmd_hdr { 314 + __le16 type; 315 + __u8 reserved[2]; 316 + __le32 id; /* Indicates unique resource object id per resource object type */ 317 + }; 318 + 319 + struct virtio_admin_cmd_resource_obj_create_data { 320 + struct virtio_admin_cmd_resource_obj_cmd_hdr hdr; 321 + __le64 flags; 322 + __u8 resource_obj_specific_data[]; 323 + }; 324 + 325 + #define VIRTIO_RESOURCE_OBJ_DEV_PARTS 0 326 + 327 + #define VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET 0 328 + #define VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_SET 1 329 + 330 + struct virtio_resource_obj_dev_parts { 331 + __u8 type; 332 + __u8 reserved[7]; 333 + }; 334 + 335 + #define VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE 0 336 + #define VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_COUNT 1 337 + #define VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_LIST 2 338 + 339 + struct virtio_admin_cmd_dev_parts_metadata_data { 340 + struct virtio_admin_cmd_resource_obj_cmd_hdr hdr; 341 + __u8 type; 342 + __u8 reserved[7]; 343 + }; 344 + 345 + #define VIRTIO_DEV_PART_F_OPTIONAL 0 346 + 347 + struct virtio_dev_part_hdr { 348 + __le16 part_type; 349 + __u8 flags; 350 + __u8 reserved; 351 + union { 352 + struct { 353 + __le32 offset; 354 + __le32 reserved; 355 + } pci_common_cfg; 356 + struct { 357 + __le16 index; 358 + __u8 reserved[6]; 359 + } vq_index; 360 + } selector; 361 + __le32 length; 362 + }; 363 + 364 + struct virtio_dev_part { 365 + struct virtio_dev_part_hdr hdr; 366 + __u8 value[]; 367 + }; 368 + 369 + struct virtio_admin_cmd_dev_parts_metadata_result { 370 + union { 371 + struct { 372 + __le32 size; 373 + __le32 reserved; 374 + } parts_size; 375 + struct { 376 + __le32 count; 377 + __le32 reserved; 378 + } hdr_list_count; 379 + struct { 380 + __le32 count; 381 + __le32 reserved; 382 + struct virtio_dev_part_hdr hdrs[]; 383 + } hdr_list; 384 + }; 385 + }; 386 + 387 + #define VIRTIO_ADMIN_CMD_DEV_PARTS_GET_TYPE_SELECTED 0 388 + #define VIRTIO_ADMIN_CMD_DEV_PARTS_GET_TYPE_ALL 1 389 + 390 + struct virtio_admin_cmd_dev_parts_get_data { 391 + struct virtio_admin_cmd_resource_obj_cmd_hdr hdr; 392 + __u8 type; 393 + __u8 reserved[7]; 394 + struct virtio_dev_part_hdr hdr_list[]; 395 + }; 396 + 397 + struct virtio_admin_cmd_dev_parts_set_data { 398 + struct virtio_admin_cmd_resource_obj_cmd_hdr hdr; 399 + struct virtio_dev_part parts[]; 400 + }; 401 + 402 + #define VIRTIO_ADMIN_CMD_DEV_MODE_F_STOPPED 0 403 + 404 + struct virtio_admin_cmd_dev_mode_set_data { 405 + __u8 flags; 299 406 }; 300 407 301 408 #endif
+1 -1
virt/kvm/vfio.c
··· 347 347 348 348 static int kvm_vfio_create(struct kvm_device *dev, u32 type); 349 349 350 - static struct kvm_device_ops kvm_vfio_ops = { 350 + static const struct kvm_device_ops kvm_vfio_ops = { 351 351 .name = "kvm-vfio", 352 352 .create = kvm_vfio_create, 353 353 .release = kvm_vfio_release,