Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'vfio-v6.16-rc1' of https://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

- Remove an outdated DMA unmap optimization that relies on a feature
only implemented in AMDv1 page tables. (Jason Gunthorpe)

- Fix various migration issues in the hisi_acc_vfio_pci variant driver,
including use of a wrong DMA address requiring an update to the
migration data structure, resending task completion interrupt after
migration to re-sync queues, fixing a write-back cache sequencing
issue, fixing a driver unload issue, behaving correctly when the
guest driver is not loaded, and avoiding to squash errors from
sub-functions. (Longfang Liu)

- mlx5-vfio-pci variant driver update to make use of the new two-step
DMA API for migration, using a page array directly rather than using
a page list mapped across a scatter list. (Leon Romanovsky)

- Fix an incorrect loop index used when unwinding allocation of dirty
page bitmaps on error, resulting in temporary failure in freeing
unused bitmaps. (Li RongQing)

* tag 'vfio-v6.16-rc1' of https://github.com/awilliam/linux-vfio:
vfio/type1: Fix error unwind in migration dirty bitmap allocation
vfio/mlx5: Enable the DMA link API
vfio/mlx5: Rewrite create mkey flow to allow better code reuse
vfio/mlx5: Explicitly use number of pages instead of allocated length
hisi_acc_vfio_pci: update function return values.
hisi_acc_vfio_pci: bugfix live migration function without VF device driver
hisi_acc_vfio_pci: bugfix the problem of uninstalling driver
hisi_acc_vfio_pci: bugfix cache write-back issue
hisi_acc_vfio_pci: add eq and aeq interruption restore
hisi_acc_vfio_pci: fix XQE dma address error
vfio/type1: Remove Fine Grained Superpages detection

+339 -342
+89 -32
drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
··· 190 190 int ret; 191 191 192 192 /* Check VF state */ 193 - if (unlikely(hisi_qm_wait_mb_ready(qm))) { 193 + ret = hisi_qm_wait_mb_ready(qm); 194 + if (unlikely(ret)) { 194 195 dev_err(&qm->pdev->dev, "QM device is not ready to write\n"); 195 - return -EBUSY; 196 + return ret; 196 197 } 197 198 198 199 ret = qm_write_regs(qm, QM_VF_AEQ_INT_MASK, &vf_data->aeq_int_mask, 1); ··· 326 325 static int vf_qm_cache_wb(struct hisi_qm *qm) 327 326 { 328 327 unsigned int val; 328 + int ret; 329 329 330 330 writel(0x1, qm->io_base + QM_CACHE_WB_START); 331 - if (readl_relaxed_poll_timeout(qm->io_base + QM_CACHE_WB_DONE, 331 + ret = readl_relaxed_poll_timeout(qm->io_base + QM_CACHE_WB_DONE, 332 332 val, val & BIT(0), MB_POLL_PERIOD_US, 333 - MB_POLL_TIMEOUT_US)) { 333 + MB_POLL_TIMEOUT_US); 334 + if (ret) { 334 335 dev_err(&qm->pdev->dev, "vf QM writeback sqc cache fail\n"); 335 - return -EINVAL; 336 + return ret; 336 337 } 337 338 338 339 return 0; ··· 353 350 return hisi_qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0); 354 351 } 355 352 353 + static int vf_qm_version_check(struct acc_vf_data *vf_data, struct device *dev) 354 + { 355 + switch (vf_data->acc_magic) { 356 + case ACC_DEV_MAGIC_V2: 357 + if (vf_data->major_ver != ACC_DRV_MAJOR_VER) { 358 + dev_info(dev, "migration driver version<%u.%u> not match!\n", 359 + vf_data->major_ver, vf_data->minor_ver); 360 + return -EINVAL; 361 + } 362 + break; 363 + case ACC_DEV_MAGIC_V1: 364 + /* Correct dma address */ 365 + vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH]; 366 + vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET; 367 + vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW]; 368 + vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH]; 369 + vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET; 370 + vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW]; 371 + break; 372 + default: 373 + return -EINVAL; 374 + } 375 + 376 + return 0; 377 + } 378 + 356 379 static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev, 357 380 struct hisi_acc_vf_migration_file *migf) 358 381 { ··· 392 363 if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev->match_done) 393 364 return 0; 394 365 395 - if (vf_data->acc_magic != ACC_DEV_MAGIC) { 366 + ret = vf_qm_version_check(vf_data, dev); 367 + if (ret) { 396 368 dev_err(dev, "failed to match ACC_DEV_MAGIC\n"); 397 - return -EINVAL; 369 + return ret; 398 370 } 399 371 400 372 if (vf_data->dev_id != hisi_acc_vdev->vf_dev->device) { ··· 407 377 ret = qm_get_vft(vf_qm, &vf_qm->qp_base); 408 378 if (ret <= 0) { 409 379 dev_err(dev, "failed to get vft qp nums\n"); 410 - return -EINVAL; 380 + return ret; 411 381 } 412 382 413 383 if (ret != vf_data->qp_num) { ··· 429 399 return -EINVAL; 430 400 } 431 401 432 - ret = qm_write_regs(vf_qm, QM_VF_STATE, &vf_data->vf_qm_state, 1); 433 - if (ret) { 434 - dev_err(dev, "failed to write QM_VF_STATE\n"); 435 - return ret; 436 - } 437 - 438 - hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state; 439 402 hisi_acc_vdev->match_done = true; 440 403 return 0; 441 404 } ··· 441 418 int vf_id = hisi_acc_vdev->vf_id; 442 419 int ret; 443 420 444 - vf_data->acc_magic = ACC_DEV_MAGIC; 421 + vf_data->acc_magic = ACC_DEV_MAGIC_V2; 422 + vf_data->major_ver = ACC_DRV_MAJOR_VER; 423 + vf_data->minor_ver = ACC_DRV_MINOR_VER; 445 424 /* Save device id */ 446 425 vf_data->dev_id = hisi_acc_vdev->vf_dev->device; 447 426 ··· 466 441 return 0; 467 442 } 468 443 444 + static void vf_qm_xeqc_save(struct hisi_qm *qm, 445 + struct hisi_acc_vf_migration_file *migf) 446 + { 447 + struct acc_vf_data *vf_data = &migf->vf_data; 448 + u16 eq_head, aeq_head; 449 + 450 + eq_head = vf_data->qm_eqc_dw[0] & 0xFFFF; 451 + qm_db(qm, 0, QM_DOORBELL_CMD_EQ, eq_head, 0); 452 + 453 + aeq_head = vf_data->qm_aeqc_dw[0] & 0xFFFF; 454 + qm_db(qm, 0, QM_DOORBELL_CMD_AEQ, aeq_head, 0); 455 + } 456 + 469 457 static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev, 470 458 struct hisi_acc_vf_migration_file *migf) 471 459 { ··· 493 455 494 456 if (migf->total_length < sizeof(struct acc_vf_data)) 495 457 return -EINVAL; 458 + 459 + if (!vf_data->eqe_dma || !vf_data->aeqe_dma || 460 + !vf_data->sqc_dma || !vf_data->cqc_dma) { 461 + dev_info(dev, "resume dma addr is NULL!\n"); 462 + hisi_acc_vdev->vf_qm_state = QM_NOT_READY; 463 + return 0; 464 + } 465 + 466 + ret = qm_write_regs(qm, QM_VF_STATE, &vf_data->vf_qm_state, 1); 467 + if (ret) { 468 + dev_err(dev, "failed to write QM_VF_STATE\n"); 469 + return ret; 470 + } 471 + hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state; 496 472 497 473 qm->eqe_dma = vf_data->eqe_dma; 498 474 qm->aeqe_dma = vf_data->aeqe_dma; ··· 545 493 546 494 ret = qm_get_regs(vf_qm, vf_data); 547 495 if (ret) 548 - return -EINVAL; 496 + return ret; 549 497 550 498 /* Every reg is 32 bit, the dma address is 64 bit. */ 551 - vf_data->eqe_dma = vf_data->qm_eqc_dw[1]; 499 + vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH]; 552 500 vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET; 553 - vf_data->eqe_dma |= vf_data->qm_eqc_dw[0]; 554 - vf_data->aeqe_dma = vf_data->qm_aeqc_dw[1]; 501 + vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW]; 502 + vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH]; 555 503 vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET; 556 - vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[0]; 504 + vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW]; 557 505 558 506 /* Through SQC_BT/CQC_BT to get sqc and cqc address */ 559 507 ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma); 560 508 if (ret) { 561 509 dev_err(dev, "failed to read SQC addr!\n"); 562 - return -EINVAL; 510 + return ret; 563 511 } 564 512 565 513 ret = qm_get_cqc(vf_qm, &vf_data->cqc_dma); 566 514 if (ret) { 567 515 dev_err(dev, "failed to read CQC addr!\n"); 568 - return -EINVAL; 516 + return ret; 569 517 } 570 518 571 519 return 0; ··· 576 524 { 577 525 struct acc_vf_data *vf_data = &migf->vf_data; 578 526 struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; 579 - struct device *dev = &vf_qm->pdev->dev; 580 527 int ret; 581 528 582 529 if (unlikely(qm_wait_dev_not_ready(vf_qm))) { ··· 589 538 vf_data->vf_qm_state = QM_READY; 590 539 hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state; 591 540 592 - ret = vf_qm_cache_wb(vf_qm); 593 - if (ret) { 594 - dev_err(dev, "failed to writeback QM Cache!\n"); 595 - return ret; 596 - } 597 - 598 541 ret = vf_qm_read_data(vf_qm, vf_data); 599 542 if (ret) 600 - return -EINVAL; 543 + return ret; 601 544 602 545 migf->total_length = sizeof(struct acc_vf_data); 546 + /* Save eqc and aeqc interrupt information */ 547 + vf_qm_xeqc_save(vf_qm, migf); 548 + 603 549 return 0; 604 550 } 605 551 ··· 1015 967 dev_err(dev, "failed to check QM INT state!\n"); 1016 968 return ret; 1017 969 } 970 + 971 + ret = vf_qm_cache_wb(vf_qm); 972 + if (ret) { 973 + dev_err(dev, "failed to writeback QM cache!\n"); 974 + return ret; 975 + } 976 + 1018 977 return 0; 1019 978 } 1020 979 ··· 1382 1327 ret = qm_wait_dev_not_ready(vf_qm); 1383 1328 if (ret) { 1384 1329 seq_puts(seq, "VF device not ready!\n"); 1385 - return -EBUSY; 1330 + return ret; 1386 1331 } 1387 1332 1388 1333 return 0; ··· 1518 1463 struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev); 1519 1464 struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; 1520 1465 1466 + hisi_acc_vf_disable_fds(hisi_acc_vdev); 1521 1467 mutex_lock(&hisi_acc_vdev->open_mutex); 1522 1468 hisi_acc_vdev->dev_opened = false; 1523 1469 iounmap(vf_qm->io_base); ··· 1541 1485 hisi_acc_vdev->vf_id = pci_iov_vf_id(pdev) + 1; 1542 1486 hisi_acc_vdev->pf_qm = pf_qm; 1543 1487 hisi_acc_vdev->vf_dev = pdev; 1488 + hisi_acc_vdev->vf_qm_state = QM_NOT_READY; 1544 1489 mutex_init(&hisi_acc_vdev->state_mutex); 1545 1490 mutex_init(&hisi_acc_vdev->open_mutex); 1546 1491
+12 -2
drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
··· 39 39 #define QM_REG_ADDR_OFFSET 0x0004 40 40 41 41 #define QM_XQC_ADDR_OFFSET 32U 42 + #define QM_XQC_ADDR_LOW 0x1 43 + #define QM_XQC_ADDR_HIGH 0x2 44 + 42 45 #define QM_VF_AEQ_INT_MASK 0x0004 43 46 #define QM_VF_EQ_INT_MASK 0x000c 44 47 #define QM_IFC_INT_SOURCE_V 0x0020 ··· 53 50 #define QM_EQC_DW0 0X8000 54 51 #define QM_AEQC_DW0 0X8020 55 52 53 + #define ACC_DRV_MAJOR_VER 1 54 + #define ACC_DRV_MINOR_VER 0 55 + 56 + #define ACC_DEV_MAGIC_V1 0XCDCDCDCDFEEDAACC 57 + #define ACC_DEV_MAGIC_V2 0xAACCFEEDDECADEDE 58 + 56 59 struct acc_vf_data { 57 60 #define QM_MATCH_SIZE offsetofend(struct acc_vf_data, qm_rsv_state) 58 61 /* QM match information */ 59 - #define ACC_DEV_MAGIC 0XCDCDCDCDFEEDAACC 60 62 u64 acc_magic; 61 63 u32 qp_num; 62 64 u32 dev_id; ··· 69 61 u32 qp_base; 70 62 u32 vf_qm_state; 71 63 /* QM reserved match information */ 72 - u32 qm_rsv_state[3]; 64 + u16 major_ver; 65 + u16 minor_ver; 66 + u32 qm_rsv_state[2]; 73 67 74 68 /* QM RW regs */ 75 69 u32 aeq_int_mask;
+179 -194
drivers/vfio/pci/mlx5/cmd.c
··· 313 313 return ret; 314 314 } 315 315 316 - static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn, 317 - struct mlx5_vhca_data_buffer *buf, 318 - struct mlx5_vhca_recv_buf *recv_buf, 319 - u32 *mkey) 316 + static u32 *alloc_mkey_in(u32 npages, u32 pdn) 320 317 { 321 - size_t npages = buf ? DIV_ROUND_UP(buf->allocated_length, PAGE_SIZE) : 322 - recv_buf->npages; 323 - int err = 0, inlen; 324 - __be64 *mtt; 318 + int inlen; 325 319 void *mkc; 326 320 u32 *in; 327 321 328 322 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + 329 - sizeof(*mtt) * round_up(npages, 2); 323 + sizeof(__be64) * round_up(npages, 2); 330 324 331 - in = kvzalloc(inlen, GFP_KERNEL); 325 + in = kvzalloc(inlen, GFP_KERNEL_ACCOUNT); 332 326 if (!in) 333 - return -ENOMEM; 327 + return NULL; 334 328 335 329 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 336 330 DIV_ROUND_UP(npages, 2)); 337 - mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 338 - 339 - if (buf) { 340 - struct sg_dma_page_iter dma_iter; 341 - 342 - for_each_sgtable_dma_page(&buf->table.sgt, &dma_iter, 0) 343 - *mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter)); 344 - } else { 345 - int i; 346 - 347 - for (i = 0; i < npages; i++) 348 - *mtt++ = cpu_to_be64(recv_buf->dma_addrs[i]); 349 - } 350 331 351 332 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 352 333 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); ··· 341 360 MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); 342 361 MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2)); 343 362 MLX5_SET64(mkc, mkc, len, npages * PAGE_SIZE); 344 - err = mlx5_core_create_mkey(mdev, mkey, in, inlen); 345 - kvfree(in); 363 + 364 + return in; 365 + } 366 + 367 + static int create_mkey(struct mlx5_core_dev *mdev, u32 npages, u32 *mkey_in, 368 + u32 *mkey) 369 + { 370 + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + 371 + sizeof(__be64) * round_up(npages, 2); 372 + 373 + return mlx5_core_create_mkey(mdev, mkey, mkey_in, inlen); 374 + } 375 + 376 + static void unregister_dma_pages(struct mlx5_core_dev *mdev, u32 npages, 377 + u32 *mkey_in, struct dma_iova_state *state, 378 + enum dma_data_direction dir) 379 + { 380 + dma_addr_t addr; 381 + __be64 *mtt; 382 + int i; 383 + 384 + if (dma_use_iova(state)) { 385 + dma_iova_destroy(mdev->device, state, npages * PAGE_SIZE, dir, 386 + 0); 387 + } else { 388 + mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in, 389 + klm_pas_mtt); 390 + for (i = npages - 1; i >= 0; i--) { 391 + addr = be64_to_cpu(mtt[i]); 392 + dma_unmap_page(mdev->device, addr, PAGE_SIZE, dir); 393 + } 394 + } 395 + } 396 + 397 + static int register_dma_pages(struct mlx5_core_dev *mdev, u32 npages, 398 + struct page **page_list, u32 *mkey_in, 399 + struct dma_iova_state *state, 400 + enum dma_data_direction dir) 401 + { 402 + dma_addr_t addr; 403 + size_t mapped = 0; 404 + __be64 *mtt; 405 + int i, err; 406 + 407 + mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in, klm_pas_mtt); 408 + 409 + if (dma_iova_try_alloc(mdev->device, state, 0, npages * PAGE_SIZE)) { 410 + addr = state->addr; 411 + for (i = 0; i < npages; i++) { 412 + err = dma_iova_link(mdev->device, state, 413 + page_to_phys(page_list[i]), mapped, 414 + PAGE_SIZE, dir, 0); 415 + if (err) 416 + goto error; 417 + *mtt++ = cpu_to_be64(addr); 418 + addr += PAGE_SIZE; 419 + mapped += PAGE_SIZE; 420 + } 421 + err = dma_iova_sync(mdev->device, state, 0, mapped); 422 + if (err) 423 + goto error; 424 + } else { 425 + for (i = 0; i < npages; i++) { 426 + addr = dma_map_page(mdev->device, page_list[i], 0, 427 + PAGE_SIZE, dir); 428 + err = dma_mapping_error(mdev->device, addr); 429 + if (err) 430 + goto error; 431 + *mtt++ = cpu_to_be64(addr); 432 + } 433 + } 434 + return 0; 435 + 436 + error: 437 + unregister_dma_pages(mdev, i, mkey_in, state, dir); 346 438 return err; 347 439 } 348 440 ··· 429 375 if (mvdev->mdev_detach) 430 376 return -ENOTCONN; 431 377 432 - if (buf->dmaed || !buf->allocated_length) 378 + if (buf->mkey_in || !buf->npages) 433 379 return -EINVAL; 434 380 435 - ret = dma_map_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0); 436 - if (ret) 437 - return ret; 381 + buf->mkey_in = alloc_mkey_in(buf->npages, buf->migf->pdn); 382 + if (!buf->mkey_in) 383 + return -ENOMEM; 438 384 439 - ret = _create_mkey(mdev, buf->migf->pdn, buf, NULL, &buf->mkey); 385 + ret = register_dma_pages(mdev, buf->npages, buf->page_list, 386 + buf->mkey_in, &buf->state, buf->dma_dir); 440 387 if (ret) 441 - goto err; 388 + goto err_register_dma; 442 389 443 - buf->dmaed = true; 390 + ret = create_mkey(mdev, buf->npages, buf->mkey_in, &buf->mkey); 391 + if (ret) 392 + goto err_create_mkey; 444 393 445 394 return 0; 446 - err: 447 - dma_unmap_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0); 395 + 396 + err_create_mkey: 397 + unregister_dma_pages(mdev, buf->npages, buf->mkey_in, &buf->state, 398 + buf->dma_dir); 399 + err_register_dma: 400 + kvfree(buf->mkey_in); 401 + buf->mkey_in = NULL; 448 402 return ret; 403 + } 404 + 405 + static void free_page_list(u32 npages, struct page **page_list) 406 + { 407 + int i; 408 + 409 + /* Undo alloc_pages_bulk() */ 410 + for (i = npages - 1; i >= 0; i--) 411 + __free_page(page_list[i]); 412 + 413 + kvfree(page_list); 449 414 } 450 415 451 416 void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf) 452 417 { 453 - struct mlx5_vf_migration_file *migf = buf->migf; 454 - struct sg_page_iter sg_iter; 418 + struct mlx5vf_pci_core_device *mvdev = buf->migf->mvdev; 419 + struct mlx5_core_dev *mdev = mvdev->mdev; 455 420 456 - lockdep_assert_held(&migf->mvdev->state_mutex); 457 - WARN_ON(migf->mvdev->mdev_detach); 421 + lockdep_assert_held(&mvdev->state_mutex); 422 + WARN_ON(mvdev->mdev_detach); 458 423 459 - if (buf->dmaed) { 460 - mlx5_core_destroy_mkey(migf->mvdev->mdev, buf->mkey); 461 - dma_unmap_sgtable(migf->mvdev->mdev->device, &buf->table.sgt, 462 - buf->dma_dir, 0); 424 + if (buf->mkey_in) { 425 + mlx5_core_destroy_mkey(mdev, buf->mkey); 426 + unregister_dma_pages(mdev, buf->npages, buf->mkey_in, 427 + &buf->state, buf->dma_dir); 428 + kvfree(buf->mkey_in); 463 429 } 464 430 465 - /* Undo alloc_pages_bulk() */ 466 - for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0) 467 - __free_page(sg_page_iter_page(&sg_iter)); 468 - sg_free_append_table(&buf->table); 431 + free_page_list(buf->npages, buf->page_list); 469 432 kfree(buf); 470 433 } 471 434 472 - static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, 473 - unsigned int npages) 435 + static int mlx5vf_add_pages(struct page ***page_list, unsigned int npages) 474 436 { 475 - unsigned int to_alloc = npages; 476 - struct page **page_list; 477 - unsigned long filled; 478 - unsigned int to_fill; 479 - int ret; 437 + unsigned int filled, done = 0; 480 438 int i; 481 439 482 - to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); 483 - page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT); 484 - if (!page_list) 440 + *page_list = 441 + kvcalloc(npages, sizeof(struct page *), GFP_KERNEL_ACCOUNT); 442 + if (!*page_list) 485 443 return -ENOMEM; 486 444 487 - do { 488 - filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, to_fill, 489 - page_list); 490 - if (!filled) { 491 - ret = -ENOMEM; 445 + for (;;) { 446 + filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, npages - done, 447 + *page_list + done); 448 + if (!filled) 492 449 goto err; 493 - } 494 - to_alloc -= filled; 495 - ret = sg_alloc_append_table_from_pages( 496 - &buf->table, page_list, filled, 0, 497 - filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, 498 - GFP_KERNEL_ACCOUNT); 499 450 500 - if (ret) 501 - goto err_append; 502 - buf->allocated_length += filled * PAGE_SIZE; 503 - /* clean input for another bulk allocation */ 504 - memset(page_list, 0, filled * sizeof(*page_list)); 505 - to_fill = min_t(unsigned int, to_alloc, 506 - PAGE_SIZE / sizeof(*page_list)); 507 - } while (to_alloc > 0); 451 + done += filled; 452 + if (done == npages) 453 + break; 454 + } 508 455 509 - kvfree(page_list); 510 456 return 0; 511 457 512 - err_append: 513 - for (i = filled - 1; i >= 0; i--) 514 - __free_page(page_list[i]); 515 458 err: 516 - kvfree(page_list); 517 - return ret; 459 + for (i = 0; i < done; i++) 460 + __free_page(*page_list[i]); 461 + 462 + kvfree(*page_list); 463 + *page_list = NULL; 464 + return -ENOMEM; 518 465 } 519 466 520 467 struct mlx5_vhca_data_buffer * 521 - mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, 522 - size_t length, 468 + mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages, 523 469 enum dma_data_direction dma_dir) 524 470 { 525 471 struct mlx5_vhca_data_buffer *buf; ··· 531 477 532 478 buf->dma_dir = dma_dir; 533 479 buf->migf = migf; 534 - if (length) { 535 - ret = mlx5vf_add_migration_pages(buf, 536 - DIV_ROUND_UP_ULL(length, PAGE_SIZE)); 480 + if (npages) { 481 + ret = mlx5vf_add_pages(&buf->page_list, npages); 537 482 if (ret) 538 483 goto end; 484 + 485 + buf->npages = npages; 539 486 540 487 if (dma_dir != DMA_NONE) { 541 488 ret = mlx5vf_dma_data_buffer(buf); ··· 560 505 } 561 506 562 507 struct mlx5_vhca_data_buffer * 563 - mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, 564 - size_t length, enum dma_data_direction dma_dir) 508 + mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages, 509 + enum dma_data_direction dma_dir) 565 510 { 566 511 struct mlx5_vhca_data_buffer *buf, *temp_buf; 567 512 struct list_head free_list; ··· 576 521 list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) { 577 522 if (buf->dma_dir == dma_dir) { 578 523 list_del_init(&buf->buf_elm); 579 - if (buf->allocated_length >= length) { 524 + if (buf->npages >= npages) { 580 525 spin_unlock_irq(&migf->list_lock); 581 526 goto found; 582 527 } ··· 590 535 } 591 536 } 592 537 spin_unlock_irq(&migf->list_lock); 593 - buf = mlx5vf_alloc_data_buffer(migf, length, dma_dir); 538 + buf = mlx5vf_alloc_data_buffer(migf, npages, dma_dir); 594 539 595 540 found: 596 541 while ((temp_buf = list_first_entry_or_null(&free_list, ··· 771 716 MLX5_SET(save_vhca_state_in, in, op_mod, 0); 772 717 MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id); 773 718 MLX5_SET(save_vhca_state_in, in, mkey, buf->mkey); 774 - MLX5_SET(save_vhca_state_in, in, size, buf->allocated_length); 719 + MLX5_SET(save_vhca_state_in, in, size, buf->npages * PAGE_SIZE); 775 720 MLX5_SET(save_vhca_state_in, in, incremental, inc); 776 721 MLX5_SET(save_vhca_state_in, in, set_track, track); 777 722 ··· 793 738 } 794 739 795 740 if (!header_buf) { 796 - header_buf = mlx5vf_get_data_buffer(migf, 797 - sizeof(struct mlx5_vf_migration_header), DMA_NONE); 741 + header_buf = mlx5vf_get_data_buffer( 742 + migf, 743 + DIV_ROUND_UP(sizeof(struct mlx5_vf_migration_header), 744 + PAGE_SIZE), 745 + DMA_NONE); 798 746 if (IS_ERR(header_buf)) { 799 747 err = PTR_ERR(header_buf); 800 748 goto err_free; ··· 841 783 if (mvdev->mdev_detach) 842 784 return -ENOTCONN; 843 785 844 - if (!buf->dmaed) { 786 + if (!buf->mkey_in) { 845 787 err = mlx5vf_dma_data_buffer(buf); 846 788 if (err) 847 789 return err; ··· 1396 1338 kfree(qp); 1397 1339 } 1398 1340 1399 - static void free_recv_pages(struct mlx5_vhca_recv_buf *recv_buf) 1400 - { 1401 - int i; 1402 - 1403 - /* Undo alloc_pages_bulk() */ 1404 - for (i = 0; i < recv_buf->npages; i++) 1405 - __free_page(recv_buf->page_list[i]); 1406 - 1407 - kvfree(recv_buf->page_list); 1408 - } 1409 - 1410 - static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf, 1411 - unsigned int npages) 1412 - { 1413 - unsigned int filled = 0, done = 0; 1414 - int i; 1415 - 1416 - recv_buf->page_list = kvcalloc(npages, sizeof(*recv_buf->page_list), 1417 - GFP_KERNEL_ACCOUNT); 1418 - if (!recv_buf->page_list) 1419 - return -ENOMEM; 1420 - 1421 - for (;;) { 1422 - filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, 1423 - npages - done, 1424 - recv_buf->page_list + done); 1425 - if (!filled) 1426 - goto err; 1427 - 1428 - done += filled; 1429 - if (done == npages) 1430 - break; 1431 - } 1432 - 1433 - recv_buf->npages = npages; 1434 - return 0; 1435 - 1436 - err: 1437 - for (i = 0; i < npages; i++) { 1438 - if (recv_buf->page_list[i]) 1439 - __free_page(recv_buf->page_list[i]); 1440 - } 1441 - 1442 - kvfree(recv_buf->page_list); 1443 - return -ENOMEM; 1444 - } 1445 - 1446 - static int register_dma_recv_pages(struct mlx5_core_dev *mdev, 1447 - struct mlx5_vhca_recv_buf *recv_buf) 1448 - { 1449 - int i, j; 1450 - 1451 - recv_buf->dma_addrs = kvcalloc(recv_buf->npages, 1452 - sizeof(*recv_buf->dma_addrs), 1453 - GFP_KERNEL_ACCOUNT); 1454 - if (!recv_buf->dma_addrs) 1455 - return -ENOMEM; 1456 - 1457 - for (i = 0; i < recv_buf->npages; i++) { 1458 - recv_buf->dma_addrs[i] = dma_map_page(mdev->device, 1459 - recv_buf->page_list[i], 1460 - 0, PAGE_SIZE, 1461 - DMA_FROM_DEVICE); 1462 - if (dma_mapping_error(mdev->device, recv_buf->dma_addrs[i])) 1463 - goto error; 1464 - } 1465 - return 0; 1466 - 1467 - error: 1468 - for (j = 0; j < i; j++) 1469 - dma_unmap_single(mdev->device, recv_buf->dma_addrs[j], 1470 - PAGE_SIZE, DMA_FROM_DEVICE); 1471 - 1472 - kvfree(recv_buf->dma_addrs); 1473 - return -ENOMEM; 1474 - } 1475 - 1476 - static void unregister_dma_recv_pages(struct mlx5_core_dev *mdev, 1477 - struct mlx5_vhca_recv_buf *recv_buf) 1478 - { 1479 - int i; 1480 - 1481 - for (i = 0; i < recv_buf->npages; i++) 1482 - dma_unmap_single(mdev->device, recv_buf->dma_addrs[i], 1483 - PAGE_SIZE, DMA_FROM_DEVICE); 1484 - 1485 - kvfree(recv_buf->dma_addrs); 1486 - } 1487 - 1488 1341 static void mlx5vf_free_qp_recv_resources(struct mlx5_core_dev *mdev, 1489 1342 struct mlx5_vhca_qp *qp) 1490 1343 { 1491 1344 struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; 1492 1345 1493 1346 mlx5_core_destroy_mkey(mdev, recv_buf->mkey); 1494 - unregister_dma_recv_pages(mdev, recv_buf); 1495 - free_recv_pages(&qp->recv_buf); 1347 + unregister_dma_pages(mdev, recv_buf->npages, recv_buf->mkey_in, 1348 + &recv_buf->state, DMA_FROM_DEVICE); 1349 + kvfree(recv_buf->mkey_in); 1350 + free_page_list(recv_buf->npages, recv_buf->page_list); 1496 1351 } 1497 1352 1498 1353 static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev, ··· 1416 1445 struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; 1417 1446 int err; 1418 1447 1419 - err = alloc_recv_pages(recv_buf, npages); 1420 - if (err < 0) 1448 + err = mlx5vf_add_pages(&recv_buf->page_list, npages); 1449 + if (err) 1421 1450 return err; 1422 1451 1423 - err = register_dma_recv_pages(mdev, recv_buf); 1424 - if (err) 1425 - goto end; 1452 + recv_buf->npages = npages; 1426 1453 1427 - err = _create_mkey(mdev, pdn, NULL, recv_buf, &recv_buf->mkey); 1454 + recv_buf->mkey_in = alloc_mkey_in(npages, pdn); 1455 + if (!recv_buf->mkey_in) { 1456 + err = -ENOMEM; 1457 + goto end; 1458 + } 1459 + 1460 + err = register_dma_pages(mdev, npages, recv_buf->page_list, 1461 + recv_buf->mkey_in, &recv_buf->state, 1462 + DMA_FROM_DEVICE); 1463 + if (err) 1464 + goto err_register_dma; 1465 + 1466 + err = create_mkey(mdev, npages, recv_buf->mkey_in, &recv_buf->mkey); 1428 1467 if (err) 1429 1468 goto err_create_mkey; 1430 1469 1431 1470 return 0; 1432 1471 1433 1472 err_create_mkey: 1434 - unregister_dma_recv_pages(mdev, recv_buf); 1473 + unregister_dma_pages(mdev, npages, recv_buf->mkey_in, &recv_buf->state, 1474 + DMA_FROM_DEVICE); 1475 + err_register_dma: 1476 + kvfree(recv_buf->mkey_in); 1477 + recv_buf->mkey_in = NULL; 1435 1478 end: 1436 - free_recv_pages(recv_buf); 1479 + free_page_list(npages, recv_buf->page_list); 1437 1480 return err; 1438 1481 } 1439 1482
+21 -14
drivers/vfio/pci/mlx5/cmd.h
··· 53 53 }; 54 54 55 55 struct mlx5_vhca_data_buffer { 56 - struct sg_append_table table; 56 + struct page **page_list; 57 + struct dma_iova_state state; 57 58 loff_t start_pos; 58 59 u64 length; 59 - u64 allocated_length; 60 + u32 npages; 60 61 u32 mkey; 62 + u32 *mkey_in; 61 63 enum dma_data_direction dma_dir; 62 - u8 dmaed:1; 63 64 u8 stop_copy_chunk_num; 64 65 struct list_head buf_elm; 65 66 struct mlx5_vf_migration_file *migf; 66 - /* Optimize mlx5vf_get_migration_page() for sequential access */ 67 - struct scatterlist *last_offset_sg; 68 - unsigned int sg_last_entry; 69 - unsigned long last_offset; 70 67 }; 71 68 72 69 struct mlx5vf_async_data { ··· 130 133 struct mlx5_vhca_recv_buf { 131 134 u32 npages; 132 135 struct page **page_list; 133 - dma_addr_t *dma_addrs; 136 + struct dma_iova_state state; 134 137 u32 next_rq_offset; 138 + u32 *mkey_in; 135 139 u32 mkey; 136 140 }; 137 141 ··· 215 217 void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf); 216 218 void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf); 217 219 struct mlx5_vhca_data_buffer * 218 - mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, 219 - size_t length, enum dma_data_direction dma_dir); 220 + mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages, 221 + enum dma_data_direction dma_dir); 220 222 void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf); 221 223 struct mlx5_vhca_data_buffer * 222 - mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, 223 - size_t length, enum dma_data_direction dma_dir); 224 + mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages, 225 + enum dma_data_direction dma_dir); 224 226 void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf); 225 - struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, 226 - unsigned long offset); 227 + static inline struct page * 228 + mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, 229 + unsigned long offset) 230 + { 231 + int page_entry = offset / PAGE_SIZE; 232 + 233 + if (page_entry >= buf->npages) 234 + return NULL; 235 + 236 + return buf->page_list[page_entry]; 237 + } 227 238 void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); 228 239 void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev, 229 240 enum mlx5_vf_migf_state *last_save_state);
+36 -51
drivers/vfio/pci/mlx5/main.c
··· 34 34 core_device); 35 35 } 36 36 37 - struct page * 38 - mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, 39 - unsigned long offset) 40 - { 41 - unsigned long cur_offset = 0; 42 - struct scatterlist *sg; 43 - unsigned int i; 44 - 45 - /* All accesses are sequential */ 46 - if (offset < buf->last_offset || !buf->last_offset_sg) { 47 - buf->last_offset = 0; 48 - buf->last_offset_sg = buf->table.sgt.sgl; 49 - buf->sg_last_entry = 0; 50 - } 51 - 52 - cur_offset = buf->last_offset; 53 - 54 - for_each_sg(buf->last_offset_sg, sg, 55 - buf->table.sgt.orig_nents - buf->sg_last_entry, i) { 56 - if (offset < sg->length + cur_offset) { 57 - buf->last_offset_sg = sg; 58 - buf->sg_last_entry += i; 59 - buf->last_offset = cur_offset; 60 - return nth_page(sg_page(sg), 61 - (offset - cur_offset) / PAGE_SIZE); 62 - } 63 - cur_offset += sg->length; 64 - } 65 - return NULL; 66 - } 67 - 68 37 static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf) 69 38 { 70 39 mutex_lock(&migf->lock); ··· 277 308 mlx5vf_mig_file_get_stop_copy_buf(struct mlx5_vf_migration_file *migf, 278 309 u8 index, size_t required_length) 279 310 { 311 + u32 npages = DIV_ROUND_UP(required_length, PAGE_SIZE); 280 312 struct mlx5_vhca_data_buffer *buf = migf->buf[index]; 281 313 u8 chunk_num; 282 314 ··· 285 315 chunk_num = buf->stop_copy_chunk_num; 286 316 buf->migf->buf[index] = NULL; 287 317 /* Checking whether the pre-allocated buffer can fit */ 288 - if (buf->allocated_length >= required_length) 318 + if (buf->npages >= npages) 289 319 return buf; 290 320 291 321 mlx5vf_put_data_buffer(buf); 292 - buf = mlx5vf_get_data_buffer(buf->migf, required_length, 293 - DMA_FROM_DEVICE); 322 + buf = mlx5vf_get_data_buffer(buf->migf, npages, DMA_FROM_DEVICE); 294 323 if (IS_ERR(buf)) 295 324 return buf; 296 325 ··· 342 373 u8 *to_buff; 343 374 int ret; 344 375 345 - header_buf = mlx5vf_get_data_buffer(migf, size, DMA_NONE); 376 + header_buf = mlx5vf_get_data_buffer(migf, DIV_ROUND_UP(size, PAGE_SIZE), 377 + DMA_NONE); 346 378 if (IS_ERR(header_buf)) 347 379 return PTR_ERR(header_buf); 348 380 ··· 358 388 to_buff = kmap_local_page(page); 359 389 memcpy(to_buff, &header, sizeof(header)); 360 390 header_buf->length = sizeof(header); 361 - data.stop_copy_size = cpu_to_le64(migf->buf[0]->allocated_length); 391 + data.stop_copy_size = cpu_to_le64(migf->buf[0]->npages * PAGE_SIZE); 362 392 memcpy(to_buff + sizeof(header), &data, sizeof(data)); 363 393 header_buf->length += sizeof(data); 364 394 kunmap_local(to_buff); ··· 407 437 408 438 num_chunks = mvdev->chunk_mode ? MAX_NUM_CHUNKS : 1; 409 439 for (i = 0; i < num_chunks; i++) { 410 - buf = mlx5vf_get_data_buffer(migf, inc_state_size, DMA_FROM_DEVICE); 440 + buf = mlx5vf_get_data_buffer( 441 + migf, DIV_ROUND_UP(inc_state_size, PAGE_SIZE), 442 + DMA_FROM_DEVICE); 411 443 if (IS_ERR(buf)) { 412 444 ret = PTR_ERR(buf); 413 445 goto err; 414 446 } 415 447 416 448 migf->buf[i] = buf; 417 - buf = mlx5vf_get_data_buffer(migf, 418 - sizeof(struct mlx5_vf_migration_header), DMA_NONE); 449 + buf = mlx5vf_get_data_buffer( 450 + migf, 451 + DIV_ROUND_UP(sizeof(struct mlx5_vf_migration_header), 452 + PAGE_SIZE), 453 + DMA_NONE); 419 454 if (IS_ERR(buf)) { 420 455 ret = PTR_ERR(buf); 421 456 goto err; ··· 528 553 * We finished transferring the current state and the device has a 529 554 * dirty state, save a new state to be ready for. 530 555 */ 531 - buf = mlx5vf_get_data_buffer(migf, inc_length, DMA_FROM_DEVICE); 556 + buf = mlx5vf_get_data_buffer(migf, DIV_ROUND_UP(inc_length, PAGE_SIZE), 557 + DMA_FROM_DEVICE); 532 558 if (IS_ERR(buf)) { 533 559 ret = PTR_ERR(buf); 534 560 mlx5vf_mark_err(migf); ··· 651 675 652 676 if (track) { 653 677 /* leave the allocated buffer ready for the stop-copy phase */ 654 - buf = mlx5vf_alloc_data_buffer(migf, 655 - migf->buf[0]->allocated_length, DMA_FROM_DEVICE); 678 + buf = mlx5vf_alloc_data_buffer(migf, migf->buf[0]->npages, 679 + DMA_FROM_DEVICE); 656 680 if (IS_ERR(buf)) { 657 681 ret = PTR_ERR(buf); 658 682 goto out_pd; ··· 893 917 goto out_unlock; 894 918 break; 895 919 case MLX5_VF_LOAD_STATE_PREP_HEADER_DATA: 896 - if (vhca_buf_header->allocated_length < migf->record_size) { 920 + { 921 + u32 npages = DIV_ROUND_UP(migf->record_size, PAGE_SIZE); 922 + 923 + if (vhca_buf_header->npages < npages) { 897 924 mlx5vf_free_data_buffer(vhca_buf_header); 898 925 899 - migf->buf_header[0] = mlx5vf_alloc_data_buffer(migf, 900 - migf->record_size, DMA_NONE); 926 + migf->buf_header[0] = mlx5vf_alloc_data_buffer( 927 + migf, npages, DMA_NONE); 901 928 if (IS_ERR(migf->buf_header[0])) { 902 929 ret = PTR_ERR(migf->buf_header[0]); 903 930 migf->buf_header[0] = NULL; ··· 913 934 vhca_buf_header->start_pos = migf->max_pos; 914 935 migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER_DATA; 915 936 break; 937 + } 916 938 case MLX5_VF_LOAD_STATE_READ_HEADER_DATA: 917 939 ret = mlx5vf_resume_read_header_data(migf, vhca_buf_header, 918 940 &buf, &len, pos, &done); ··· 924 944 { 925 945 u64 size = max(migf->record_size, 926 946 migf->stop_copy_prep_size); 947 + u32 npages = DIV_ROUND_UP(size, PAGE_SIZE); 927 948 928 - if (vhca_buf->allocated_length < size) { 949 + if (vhca_buf->npages < npages) { 929 950 mlx5vf_free_data_buffer(vhca_buf); 930 951 931 - migf->buf[0] = mlx5vf_alloc_data_buffer(migf, 932 - size, DMA_TO_DEVICE); 952 + migf->buf[0] = mlx5vf_alloc_data_buffer( 953 + migf, npages, DMA_TO_DEVICE); 933 954 if (IS_ERR(migf->buf[0])) { 934 955 ret = PTR_ERR(migf->buf[0]); 935 956 migf->buf[0] = NULL; ··· 1018 1037 } 1019 1038 1020 1039 migf->buf[0] = buf; 1021 - buf = mlx5vf_alloc_data_buffer(migf, 1022 - sizeof(struct mlx5_vf_migration_header), DMA_NONE); 1040 + buf = mlx5vf_alloc_data_buffer( 1041 + migf, 1042 + DIV_ROUND_UP(sizeof(struct mlx5_vf_migration_header), 1043 + PAGE_SIZE), 1044 + DMA_NONE); 1023 1045 if (IS_ERR(buf)) { 1024 1046 ret = PTR_ERR(buf); 1025 1047 goto out_buf; ··· 1132 1148 MLX5VF_QUERY_INC | MLX5VF_QUERY_CLEANUP); 1133 1149 if (ret) 1134 1150 return ERR_PTR(ret); 1135 - buf = mlx5vf_get_data_buffer(migf, size, DMA_FROM_DEVICE); 1151 + buf = mlx5vf_get_data_buffer(migf, 1152 + DIV_ROUND_UP(size, PAGE_SIZE), DMA_FROM_DEVICE); 1136 1153 if (IS_ERR(buf)) 1137 1154 return ERR_CAST(buf); 1138 1155 /* pre_copy cleanup */
+2 -49
drivers/vfio/vfio_iommu_type1.c
··· 80 80 struct iommu_domain *domain; 81 81 struct list_head next; 82 82 struct list_head group_list; 83 - bool fgsp : 1; /* Fine-grained super pages */ 84 83 bool enforce_cache_coherency : 1; 85 84 }; 86 85 ··· 292 293 struct rb_node *p; 293 294 294 295 for (p = rb_prev(n); p; p = rb_prev(p)) { 295 - struct vfio_dma *dma = rb_entry(n, 296 + struct vfio_dma *dma = rb_entry(p, 296 297 struct vfio_dma, node); 297 298 298 299 vfio_dma_bitmap_free(dma); ··· 1094 1095 * may require hardware cache flushing, try to find the 1095 1096 * largest contiguous physical memory chunk to unmap. 1096 1097 */ 1097 - for (len = PAGE_SIZE; 1098 - !domain->fgsp && iova + len < end; len += PAGE_SIZE) { 1098 + for (len = PAGE_SIZE; iova + len < end; len += PAGE_SIZE) { 1099 1099 next = iommu_iova_to_phys(domain->domain, iova + len); 1100 1100 if (next != phys + len) 1101 1101 break; ··· 1831 1833 return ret; 1832 1834 } 1833 1835 1834 - /* 1835 - * We change our unmap behavior slightly depending on whether the IOMMU 1836 - * supports fine-grained superpages. IOMMUs like AMD-Vi will use a superpage 1837 - * for practically any contiguous power-of-two mapping we give it. This means 1838 - * we don't need to look for contiguous chunks ourselves to make unmapping 1839 - * more efficient. On IOMMUs with coarse-grained super pages, like Intel VT-d 1840 - * with discrete 2M/1G/512G/1T superpages, identifying contiguous chunks 1841 - * significantly boosts non-hugetlbfs mappings and doesn't seem to hurt when 1842 - * hugetlbfs is in use. 1843 - */ 1844 - static void vfio_test_domain_fgsp(struct vfio_domain *domain, struct list_head *regions) 1845 - { 1846 - int ret, order = get_order(PAGE_SIZE * 2); 1847 - struct vfio_iova *region; 1848 - struct page *pages; 1849 - dma_addr_t start; 1850 - 1851 - pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); 1852 - if (!pages) 1853 - return; 1854 - 1855 - list_for_each_entry(region, regions, list) { 1856 - start = ALIGN(region->start, PAGE_SIZE * 2); 1857 - if (start >= region->end || (region->end - start < PAGE_SIZE * 2)) 1858 - continue; 1859 - 1860 - ret = iommu_map(domain->domain, start, page_to_phys(pages), PAGE_SIZE * 2, 1861 - IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE, 1862 - GFP_KERNEL_ACCOUNT); 1863 - if (!ret) { 1864 - size_t unmapped = iommu_unmap(domain->domain, start, PAGE_SIZE); 1865 - 1866 - if (unmapped == PAGE_SIZE) 1867 - iommu_unmap(domain->domain, start + PAGE_SIZE, PAGE_SIZE); 1868 - else 1869 - domain->fgsp = true; 1870 - } 1871 - break; 1872 - } 1873 - 1874 - __free_pages(pages, order); 1875 - } 1876 - 1877 1836 static struct vfio_iommu_group *find_iommu_group(struct vfio_domain *domain, 1878 1837 struct iommu_group *iommu_group) 1879 1838 { ··· 2268 2313 goto out_domain; 2269 2314 } 2270 2315 } 2271 - 2272 - vfio_test_domain_fgsp(domain, &iova_copy); 2273 2316 2274 2317 /* replay mappings on new domains */ 2275 2318 ret = vfio_iommu_replay(iommu, domain);