Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

iommu/amd: Introduce gDomID-to-hDomID Mapping and handle parent domain invalidation

Each nested domain is assigned guest domain ID (gDomID), which guest OS
programs into guest Device Table Entry (gDTE). For each gDomID, the driver
assigns a corresponding host domain ID (hDomID), which will be programmed
into the host Device Table Entry (hDTE).

The hDomID is allocated during amd_iommu_alloc_domain_nested(),
and free during nested_domain_free(). The gDomID-to-hDomID mapping info
(struct guest_domain_mapping_info) is stored in a per-viommu xarray
(struct amd_iommu_viommu.gdomid_array), which is indexed by gDomID.

Note also that parent domain can be shared among struct iommufd_viommu.
Therefore, when hypervisor invalidates the nest parent domain, the AMD
IOMMU command INVALIDATE_IOMMU_PAGES must be issued for each hDomID in
the gdomid_array. This is handled by the iommu_flush_pages_v1_hdom_ids(),
where it iterates through struct protection_domain.viommu_list.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

authored by

Suravee Suthikulpanit and committed by
Joerg Roedel
757d2b1f 774180a7

+203
+23
drivers/iommu/amd/amd_iommu_types.h
··· 503 503 struct amd_iommu_viommu { 504 504 struct iommufd_viommu core; 505 505 struct protection_domain *parent; /* nest parent domain for this viommu */ 506 + struct list_head pdom_list; /* For protection_domain->viommu_list */ 507 + 508 + /* 509 + * Per-vIOMMU guest domain ID to host domain ID mapping. 510 + * Indexed by guest domain ID. 511 + */ 512 + struct xarray gdomid_array; 513 + }; 514 + 515 + /* 516 + * Contains guest domain ID mapping info, 517 + * which is stored in the struct xarray gdomid_array. 518 + */ 519 + struct guest_domain_mapping_info { 520 + refcount_t users; 521 + u32 hdom_id; /* Host domain ID */ 506 522 }; 507 523 508 524 /* ··· 527 511 struct nested_domain { 528 512 struct iommu_domain domain; /* generic domain handle used by iommu core code */ 529 513 u16 gdom_id; /* domain ID from gDTE */ 514 + struct guest_domain_mapping_info *gdom_info; 530 515 struct iommu_hwpt_amd_guest gdte; /* Guest vIOMMU DTE */ 531 516 struct amd_iommu_viommu *viommu; /* AMD hw-viommu this nested domain belong to */ 532 517 }; ··· 552 535 553 536 struct mmu_notifier mn; /* mmu notifier for the SVA domain */ 554 537 struct list_head dev_data_list; /* List of pdom_dev_data */ 538 + 539 + /* 540 + * Store reference to list of vIOMMUs, which use this protection domain. 541 + * This will be used to look up host domain ID when flushing this domain. 542 + */ 543 + struct list_head viommu_list; 555 544 }; 556 545 PT_IOMMU_CHECK_DOMAIN(struct protection_domain, iommu, domain); 557 546 PT_IOMMU_CHECK_DOMAIN(struct protection_domain, amdv1.iommu, domain);
+38
drivers/iommu/amd/iommu.c
··· 1543 1543 iommu_completion_wait(iommu); 1544 1544 } 1545 1545 1546 + static int iommu_flush_pages_v1_hdom_ids(struct protection_domain *pdom, u64 address, size_t size) 1547 + { 1548 + int ret = 0; 1549 + struct amd_iommu_viommu *aviommu; 1550 + 1551 + list_for_each_entry(aviommu, &pdom->viommu_list, pdom_list) { 1552 + unsigned long i; 1553 + struct guest_domain_mapping_info *gdom_info; 1554 + struct amd_iommu *iommu = container_of(aviommu->core.iommu_dev, 1555 + struct amd_iommu, iommu); 1556 + 1557 + xa_lock(&aviommu->gdomid_array); 1558 + xa_for_each(&aviommu->gdomid_array, i, gdom_info) { 1559 + struct iommu_cmd cmd; 1560 + 1561 + pr_debug("%s: iommu=%#x, hdom_id=%#x\n", __func__, 1562 + iommu->devid, gdom_info->hdom_id); 1563 + build_inv_iommu_pages(&cmd, address, size, gdom_info->hdom_id, 1564 + IOMMU_NO_PASID, false); 1565 + ret |= iommu_queue_command(iommu, &cmd); 1566 + } 1567 + xa_unlock(&aviommu->gdomid_array); 1568 + } 1569 + return ret; 1570 + } 1571 + 1546 1572 static void amd_iommu_flush_all(struct amd_iommu *iommu) 1547 1573 { 1548 1574 struct iommu_cmd cmd; ··· 1716 1690 */ 1717 1691 ret |= iommu_queue_command(pdom_iommu_info->iommu, &cmd); 1718 1692 } 1693 + 1694 + /* 1695 + * A domain w/ v1 table can be a nest parent, which can have 1696 + * multiple nested domains. Each nested domain has 1:1 mapping 1697 + * between gDomID and hDomID. Therefore, flush every hDomID 1698 + * associated to this nest parent domain. 1699 + * 1700 + * See drivers/iommu/amd/nested.c: amd_iommu_alloc_domain_nested() 1701 + */ 1702 + if (!list_empty(&pdom->viommu_list)) 1703 + ret |= iommu_flush_pages_v1_hdom_ids(pdom, address, size); 1719 1704 1720 1705 return ret; 1721 1706 } ··· 2545 2508 spin_lock_init(&domain->lock); 2546 2509 INIT_LIST_HEAD(&domain->dev_list); 2547 2510 INIT_LIST_HEAD(&domain->dev_data_list); 2511 + INIT_LIST_HEAD(&domain->viommu_list); 2548 2512 xa_init(&domain->iommu_array); 2549 2513 } 2550 2514
+31
drivers/iommu/amd/iommufd.c
··· 9 9 #include "amd_iommu.h" 10 10 #include "amd_iommu_types.h" 11 11 12 + static const struct iommufd_viommu_ops amd_viommu_ops; 13 + 12 14 void *amd_iommufd_hw_info(struct device *dev, u32 *length, u32 *type) 13 15 { 14 16 struct iommu_hw_info_amd *hwinfo; ··· 40 38 int amd_iommufd_viommu_init(struct iommufd_viommu *viommu, struct iommu_domain *parent, 41 39 const struct iommu_user_data *user_data) 42 40 { 41 + unsigned long flags; 43 42 struct protection_domain *pdom = to_pdomain(parent); 44 43 struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core); 45 44 45 + xa_init_flags(&aviommu->gdomid_array, XA_FLAGS_ALLOC1); 46 46 aviommu->parent = pdom; 47 + 48 + viommu->ops = &amd_viommu_ops; 49 + 50 + spin_lock_irqsave(&pdom->lock, flags); 51 + list_add(&aviommu->pdom_list, &pdom->viommu_list); 52 + spin_unlock_irqrestore(&pdom->lock, flags); 47 53 48 54 return 0; 49 55 } 56 + 57 + static void amd_iommufd_viommu_destroy(struct iommufd_viommu *viommu) 58 + { 59 + unsigned long flags; 60 + struct amd_iommu *iommu = container_of(viommu->iommu_dev, struct amd_iommu, iommu); 61 + struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core); 62 + struct protection_domain *pdom = aviommu->parent; 63 + 64 + spin_lock_irqsave(&pdom->lock, flags); 65 + list_del(&aviommu->pdom_list); 66 + spin_unlock_irqrestore(&pdom->lock, flags); 67 + xa_destroy(&aviommu->gdomid_array); 68 + } 69 + 70 + /* 71 + * See include/linux/iommufd.h 72 + * struct iommufd_viommu_ops - vIOMMU specific operations 73 + */ 74 + static const struct iommufd_viommu_ops amd_viommu_ops = { 75 + .destroy = amd_iommufd_viommu_destroy, 76 + };
+111
drivers/iommu/amd/nested.c
··· 6 6 #define dev_fmt(fmt) "AMD-Vi: " fmt 7 7 8 8 #include <linux/iommu.h> 9 + #include <linux/refcount.h> 9 10 #include <uapi/linux/iommufd.h> 10 11 11 12 #include "amd_iommu.h" ··· 59 58 return 0; 60 59 } 61 60 61 + static void *gdom_info_load_or_alloc_locked(struct xarray *xa, unsigned long index) 62 + { 63 + struct guest_domain_mapping_info *elm, *res; 64 + 65 + elm = xa_load(xa, index); 66 + if (elm) 67 + return elm; 68 + 69 + xa_unlock(xa); 70 + elm = kzalloc(sizeof(struct guest_domain_mapping_info), GFP_KERNEL); 71 + xa_lock(xa); 72 + if (!elm) 73 + return ERR_PTR(-ENOMEM); 74 + 75 + res = __xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL); 76 + if (xa_is_err(res)) 77 + res = ERR_PTR(xa_err(res)); 78 + 79 + if (res) { 80 + kfree(elm); 81 + return res; 82 + } 83 + 84 + refcount_set(&elm->users, 0); 85 + return elm; 86 + } 87 + 62 88 /* 63 89 * This function is assigned to struct iommufd_viommu_ops.alloc_domain_nested() 64 90 * during the call to struct iommu_ops.viommu_init(). ··· 96 68 { 97 69 int ret; 98 70 struct nested_domain *ndom; 71 + struct guest_domain_mapping_info *gdom_info; 99 72 struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core); 100 73 101 74 if (user_data->type != IOMMU_HWPT_DATA_AMD_GUEST) ··· 121 92 ndom->domain.type = IOMMU_DOMAIN_NESTED; 122 93 ndom->viommu = aviommu; 123 94 95 + /* 96 + * Normally, when a guest has multiple pass-through devices, 97 + * the IOMMU driver setup DTEs with the same stage-2 table and 98 + * use the same host domain ID (hDomId). In case of nested translation, 99 + * if the guest setup different stage-1 tables with same PASID, 100 + * IOMMU would use the same TLB tag. This will results in TLB 101 + * aliasing issue. 102 + * 103 + * The guest is assigning gDomIDs based on its own algorithm for managing 104 + * cache tags of (DomID, PASID). Within a single viommu, the nest parent domain 105 + * (w/ S2 table) is used by all DTEs. But we need to consistently map the gDomID 106 + * to a single hDomID. This is done using an xarray in the vIOMMU to 107 + * keep track of the gDomID mapping. When the S2 is changed, the INVALIDATE_IOMMU_PAGES 108 + * command must be issued for each hDomID in the xarray. 109 + */ 110 + xa_lock(&aviommu->gdomid_array); 111 + 112 + gdom_info = gdom_info_load_or_alloc_locked(&aviommu->gdomid_array, ndom->gdom_id); 113 + if (IS_ERR(gdom_info)) { 114 + xa_unlock(&aviommu->gdomid_array); 115 + ret = PTR_ERR(gdom_info); 116 + goto out_err; 117 + } 118 + 119 + /* Check if gDomID exist */ 120 + if (refcount_inc_not_zero(&gdom_info->users)) { 121 + ndom->gdom_info = gdom_info; 122 + xa_unlock(&aviommu->gdomid_array); 123 + 124 + pr_debug("%s: Found gdom_id=%#x, hdom_id=%#x\n", 125 + __func__, ndom->gdom_id, gdom_info->hdom_id); 126 + 127 + return &ndom->domain; 128 + } 129 + 130 + /* The gDomID does not exist. We allocate new hdom_id */ 131 + gdom_info->hdom_id = amd_iommu_pdom_id_alloc(); 132 + if (gdom_info->hdom_id <= 0) { 133 + __xa_cmpxchg(&aviommu->gdomid_array, 134 + ndom->gdom_id, gdom_info, NULL, GFP_ATOMIC); 135 + xa_unlock(&aviommu->gdomid_array); 136 + ret = -ENOSPC; 137 + goto out_err_gdom_info; 138 + } 139 + 140 + ndom->gdom_info = gdom_info; 141 + refcount_set(&gdom_info->users, 1); 142 + 143 + xa_unlock(&aviommu->gdomid_array); 144 + 145 + pr_debug("%s: Allocate gdom_id=%#x, hdom_id=%#x\n", 146 + __func__, ndom->gdom_id, gdom_info->hdom_id); 147 + 124 148 return &ndom->domain; 149 + 150 + out_err_gdom_info: 151 + kfree(gdom_info); 125 152 out_err: 126 153 kfree(ndom); 127 154 return ERR_PTR(ret); ··· 185 100 186 101 static void nested_domain_free(struct iommu_domain *dom) 187 102 { 103 + struct guest_domain_mapping_info *curr; 188 104 struct nested_domain *ndom = to_ndomain(dom); 105 + struct amd_iommu_viommu *aviommu = ndom->viommu; 189 106 107 + xa_lock(&aviommu->gdomid_array); 108 + 109 + if (!refcount_dec_and_test(&ndom->gdom_info->users)) { 110 + xa_unlock(&aviommu->gdomid_array); 111 + return; 112 + } 113 + 114 + /* 115 + * The refcount for the gdom_id to hdom_id mapping is zero. 116 + * It is now safe to remove the mapping. 117 + */ 118 + curr = __xa_cmpxchg(&aviommu->gdomid_array, ndom->gdom_id, 119 + ndom->gdom_info, NULL, GFP_ATOMIC); 120 + 121 + xa_unlock(&aviommu->gdomid_array); 122 + if (WARN_ON(!curr || xa_err(curr))) 123 + return; 124 + 125 + /* success */ 126 + pr_debug("%s: Free gdom_id=%#x, hdom_id=%#x\n", 127 + __func__, ndom->gdom_id, curr->hdom_id); 128 + 129 + amd_iommu_pdom_id_free(ndom->gdom_info->hdom_id); 130 + kfree(curr); 190 131 kfree(ndom); 191 132 } 192 133