···26752675 1 - Bypass the IOMMU for DMA.26762676 unset - Use value of CONFIG_IOMMU_DEFAULT_PASSTHROUGH.2677267726782678+ iommu.debug_pagealloc=26792679+ [KNL,EARLY] When CONFIG_IOMMU_DEBUG_PAGEALLOC is set, this26802680+ parameter enables the feature at boot time. By default, it26812681+ is disabled and the system behaves the same way as a kernel26822682+ built without CONFIG_IOMMU_DEBUG_PAGEALLOC.26832683+ Format: { "0" | "1" }26842684+ 0 - Sanitizer disabled.26852685+ 1 - Sanitizer enabled, expect runtime overhead.26862686+26782687 io7= [HW] IO7 for Marvel-based Alpha systems26792688 See comment before marvel_specify_io7 in26802689 arch/alpha/kernel/core_marvel.c.
···384384385385 Say Y here if you want to use the multimedia devices listed above.386386387387+config IOMMU_DEBUG_PAGEALLOC388388+ bool "Debug IOMMU mappings against page allocations"389389+ depends on DEBUG_PAGEALLOC && IOMMU_API && PAGE_EXTENSION390390+ help391391+ This enables a consistency check between the kernel page allocator and392392+ the IOMMU subsystem. It verifies that pages being allocated or freed393393+ are not currently mapped in any IOMMU domain.394394+395395+ This helps detect DMA use-after-free bugs where a driver frees a page396396+ but forgets to unmap it from the IOMMU, potentially allowing a device397397+ to overwrite memory that the kernel has repurposed.398398+399399+ These checks are best-effort and may not detect all problems.400400+401401+ Due to performance overhead, this feature is disabled by default.402402+ You must enable "iommu.debug_pagealloc" from the kernel command403403+ line to activate the runtime checks.404404+405405+ If unsure, say N.387406endif # IOMMU_SUPPORT388407389408source "drivers/iommu/generic_pt/Kconfig"
···3030 your BIOS for an option to enable it or if you have an IVRS ACPI3131 table.32323333+config AMD_IOMMU_IOMMUFD3434+ bool "Enable IOMMUFD features for AMD IOMMU (EXPERIMENTAL)"3535+ depends on IOMMUFD3636+ depends on AMD_IOMMU3737+ help3838+ Support for IOMMUFD features intended to support virtual machines3939+ with accelerated virtual IOMMUs.4040+4141+ Say Y here if you are doing development and testing on this feature.4242+3343config AMD_IOMMU_DEBUGFS3444 bool "Enable AMD IOMMU internals in DebugFS"3545 depends on AMD_IOMMU && IOMMU_DEBUGFS
···190190struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);191191struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid);192192193193+void amd_iommu_set_dte_v1(struct iommu_dev_data *dev_data,194194+ struct protection_domain *domain, u16 domid,195195+ struct pt_iommu_amdv1_hw_info *pt_info,196196+ struct dev_table_entry *new);197197+void amd_iommu_update_dte(struct amd_iommu *iommu,198198+ struct iommu_dev_data *dev_data,199199+ struct dev_table_entry *new);200200+201201+static inline void202202+amd_iommu_make_clear_dte(struct iommu_dev_data *dev_data, struct dev_table_entry *new)203203+{204204+ struct dev_table_entry *initial_dte;205205+ struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev);206206+207207+ /* All existing DTE must have V bit set */208208+ new->data128[0] = DTE_FLAG_V;209209+ new->data128[1] = 0;210210+211211+ /*212212+ * Restore cached persistent DTE bits, which can be set by information213213+ * in IVRS table. See set_dev_entry_from_acpi().214214+ */215215+ initial_dte = amd_iommu_get_ivhd_dte_flags(iommu->pci_seg->id, dev_data->devid);216216+ if (initial_dte) {217217+ new->data128[0] |= initial_dte->data128[0];218218+ new->data128[1] |= initial_dte->data128[1];219219+ }220220+}221221+222222+/* NESTED */223223+struct iommu_domain *224224+amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,225225+ const struct iommu_user_data *user_data);193226#endif /* AMD_IOMMU_H */
+48-2
drivers/iommu/amd/amd_iommu_types.h
···1717#include <linux/list.h>1818#include <linux/spinlock.h>1919#include <linux/pci.h>2020+#include <linux/iommufd.h>2021#include <linux/irqreturn.h>2122#include <linux/generic_pt/iommu.h>2323+2424+#include <uapi/linux/iommufd.h>22252326/*2427 * Maximum number of IOMMUs supported···111108112109/* Extended Feature 2 Bits */113110#define FEATURE_SEVSNPIO_SUP BIT_ULL(1)111111+#define FEATURE_GCR3TRPMODE BIT_ULL(3)114112#define FEATURE_SNPAVICSUP GENMASK_ULL(7, 5)115113#define FEATURE_SNPAVICSUP_GAM(x) \116114 (FIELD_GET(FEATURE_SNPAVICSUP, x) == 0x1)···190186#define CONTROL_EPH_EN 45191187#define CONTROL_XT_EN 50192188#define CONTROL_INTCAPXT_EN 51189189+#define CONTROL_GCR3TRPMODE 58193190#define CONTROL_IRTCACHEDIS 59194191#define CONTROL_SNPAVIC_EN 61195192···355350#define DTE_FLAG_V BIT_ULL(0)356351#define DTE_FLAG_TV BIT_ULL(1)357352#define DTE_FLAG_HAD (3ULL << 7)353353+#define DTE_MODE_MASK GENMASK_ULL(11, 9)354354+#define DTE_HOST_TRP GENMASK_ULL(51, 12)355355+#define DTE_FLAG_PPR BIT_ULL(52)358356#define DTE_FLAG_GIOV BIT_ULL(54)359357#define DTE_FLAG_GV BIT_ULL(55)360358#define DTE_GLX GENMASK_ULL(57, 56)···366358367359#define DTE_FLAG_IOTLB BIT_ULL(32)368360#define DTE_FLAG_MASK (0x3ffULL << 32)369369-#define DEV_DOMID_MASK 0xffffULL361361+#define DTE_DOMID_MASK GENMASK_ULL(15, 0)370362371363#define DTE_GCR3_14_12 GENMASK_ULL(60, 58)372364#define DTE_GCR3_30_15 GENMASK_ULL(31, 16)···501493 u32 refcnt; /* Count of attached dev/pasid per domain/IOMMU */502494};503495496496+struct amd_iommu_viommu {497497+ struct iommufd_viommu core;498498+ struct protection_domain *parent; /* nest parent domain for this viommu */499499+ struct list_head pdom_list; /* For protection_domain->viommu_list */500500+501501+ /*502502+ * Per-vIOMMU guest domain ID to host domain ID mapping.503503+ * Indexed by guest domain ID.504504+ */505505+ struct xarray gdomid_array;506506+};507507+508508+/*509509+ * Contains guest domain ID mapping info,510510+ * which is stored in the struct xarray gdomid_array.511511+ */512512+struct guest_domain_mapping_info {513513+ refcount_t users;514514+ u32 hdom_id; /* Host domain ID */515515+};516516+517517+/*518518+ * Nested domain is specifically used for nested translation519519+ */520520+struct nested_domain {521521+ struct iommu_domain domain; /* generic domain handle used by iommu core code */522522+ u16 gdom_id; /* domain ID from gDTE */523523+ struct guest_domain_mapping_info *gdom_info;524524+ struct iommu_hwpt_amd_guest gdte; /* Guest vIOMMU DTE */525525+ struct amd_iommu_viommu *viommu; /* AMD hw-viommu this nested domain belong to */526526+};527527+504528/*505529 * This structure contains generic data for IOMMU protection domains506530 * independent of their use.···553513554514 struct mmu_notifier mn; /* mmu notifier for the SVA domain */555515 struct list_head dev_data_list; /* List of pdom_dev_data */516516+517517+ /*518518+ * Store reference to list of vIOMMUs, which use this protection domain.519519+ * This will be used to look up host domain ID when flushing this domain.520520+ */521521+ struct list_head viommu_list;556522};557523PT_IOMMU_CHECK_DOMAIN(struct protection_domain, iommu, domain);558524PT_IOMMU_CHECK_DOMAIN(struct protection_domain, amdv1.iommu, domain);···752706753707 u32 flags;754708 volatile u64 *cmd_sem;755755- atomic64_t cmd_sem_val;709709+ u64 cmd_sem_val;756710 /*757711 * Track physical address to directly use it in build_completion_wait()758712 * and avoid adding any special checks and handling for kdump.
+10-2
drivers/iommu/amd/init.c
···11221122 return;1123112311241124 iommu_feature_enable(iommu, CONTROL_GT_EN);11251125+11261126+ /*11271127+ * This feature needs to be enabled prior to a call11281128+ * to iommu_snp_enable(). Since this function is called11291129+ * in early_enable_iommu(), it is safe to enable here.11301130+ */11311131+ if (check_feature2(FEATURE_GCR3TRPMODE))11321132+ iommu_feature_enable(iommu, CONTROL_GCR3TRPMODE);11251133}1126113411271135/* sets a specific bit in the device table entry. */···11871179 for (devid = 0; devid <= pci_seg->last_bdf; devid++) {11881180 old_dev_tbl_entry = &pci_seg->old_dev_tbl_cpy[devid];11891181 dte_v = FIELD_GET(DTE_FLAG_V, old_dev_tbl_entry->data[0]);11901190- dom_id = FIELD_GET(DEV_DOMID_MASK, old_dev_tbl_entry->data[1]);11821182+ dom_id = FIELD_GET(DTE_DOMID_MASK, old_dev_tbl_entry->data[1]);1191118311921184 if (!dte_v || !dom_id)11931185 continue;···18851877 iommu->pci_seg = pci_seg;1886187818871879 raw_spin_lock_init(&iommu->lock);18881888- atomic64_set(&iommu->cmd_sem_val, 0);18801880+ iommu->cmd_sem_val = 0;1889188118901882 /* Add IOMMU to internal data structures */18911883 list_add_tail(&iommu->list, &amd_iommu_list);
+192-119
drivers/iommu/amd/iommu.c
···4343#include <linux/generic_pt/iommu.h>44444545#include "amd_iommu.h"4646+#include "iommufd.h"4647#include "../irq_remapping.h"4748#include "../iommu-pages.h"4849···7675 struct iommu_dev_data *dev_data,7776 phys_addr_t top_paddr, unsigned int top_level);78777878+static int device_flush_dte(struct iommu_dev_data *dev_data);7979+7980static void amd_iommu_change_top(struct pt_iommu *iommu_table,8081 phys_addr_t top_paddr, unsigned int top_level);8182···8784static bool amd_iommu_enforce_cache_coherency(struct iommu_domain *domain);8885static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,8986 bool enable);8787+8888+static void clone_aliases(struct amd_iommu *iommu, struct device *dev);8989+9090+static int iommu_completion_wait(struct amd_iommu *iommu);90919192/****************************************************************************9293 *···207200 }208201209202 spin_unlock_irqrestore(&dev_data->dte_lock, flags);203203+}204204+205205+void amd_iommu_update_dte(struct amd_iommu *iommu,206206+ struct iommu_dev_data *dev_data,207207+ struct dev_table_entry *new)208208+{209209+ update_dte256(iommu, dev_data, new);210210+ clone_aliases(iommu, dev_data->dev);211211+ device_flush_dte(dev_data);212212+ iommu_completion_wait(iommu);210213}211214212215static void get_dte256(struct amd_iommu *iommu, struct iommu_dev_data *dev_data,···12021185{12031186 int i = 0;1204118712051205- while (*iommu->cmd_sem != data && i < LOOP_TIMEOUT) {11881188+ /*11891189+ * cmd_sem holds a monotonically non-decreasing completion sequence11901190+ * number.11911191+ */11921192+ while ((__s64)(READ_ONCE(*iommu->cmd_sem) - data) < 0 &&11931193+ i < LOOP_TIMEOUT) {12061194 udelay(1);12071195 i += 1;12081196 }···14391417 return iommu_queue_command_sync(iommu, cmd, true);14401418}1441141914201420+static u64 get_cmdsem_val(struct amd_iommu *iommu)14211421+{14221422+ lockdep_assert_held(&iommu->lock);14231423+ return ++iommu->cmd_sem_val;14241424+}14251425+14421426/*14431427 * This function queues a completion wait command into the command14441428 * buffer of an IOMMU···14591431 if (!iommu->need_sync)14601432 return 0;1461143314621462- data = atomic64_inc_return(&iommu->cmd_sem_val);14631463- build_completion_wait(&cmd, iommu, data);14641464-14651434 raw_spin_lock_irqsave(&iommu->lock, flags);1466143514361436+ data = get_cmdsem_val(iommu);14371437+ build_completion_wait(&cmd, iommu, data);14381438+14671439 ret = __iommu_queue_command_sync(iommu, &cmd, false);14401440+ raw_spin_unlock_irqrestore(&iommu->lock, flags);14411441+14681442 if (ret)14691469- goto out_unlock;14431443+ return ret;1470144414711445 ret = wait_on_sem(iommu, data);14721472-14731473-out_unlock:14741474- raw_spin_unlock_irqrestore(&iommu->lock, flags);1475144614761447 return ret;14771448}···15471520 iommu_queue_command(iommu, &cmd);1548152115491522 iommu_completion_wait(iommu);15231523+}15241524+15251525+static int iommu_flush_pages_v1_hdom_ids(struct protection_domain *pdom, u64 address, size_t size)15261526+{15271527+ int ret = 0;15281528+ struct amd_iommu_viommu *aviommu;15291529+15301530+ list_for_each_entry(aviommu, &pdom->viommu_list, pdom_list) {15311531+ unsigned long i;15321532+ struct guest_domain_mapping_info *gdom_info;15331533+ struct amd_iommu *iommu = container_of(aviommu->core.iommu_dev,15341534+ struct amd_iommu, iommu);15351535+15361536+ xa_lock(&aviommu->gdomid_array);15371537+ xa_for_each(&aviommu->gdomid_array, i, gdom_info) {15381538+ struct iommu_cmd cmd;15391539+15401540+ pr_debug("%s: iommu=%#x, hdom_id=%#x\n", __func__,15411541+ iommu->devid, gdom_info->hdom_id);15421542+ build_inv_iommu_pages(&cmd, address, size, gdom_info->hdom_id,15431543+ IOMMU_NO_PASID, false);15441544+ ret |= iommu_queue_command(iommu, &cmd);15451545+ }15461546+ xa_unlock(&aviommu->gdomid_array);15471547+ }15481548+ return ret;15501549}1551155015521551static void amd_iommu_flush_all(struct amd_iommu *iommu)···17221669 */17231670 ret |= iommu_queue_command(pdom_iommu_info->iommu, &cmd);17241671 }16721672+16731673+ /*16741674+ * A domain w/ v1 table can be a nest parent, which can have16751675+ * multiple nested domains. Each nested domain has 1:1 mapping16761676+ * between gDomID and hDomID. Therefore, flush every hDomID16771677+ * associated to this nest parent domain.16781678+ *16791679+ * See drivers/iommu/amd/nested.c: amd_iommu_alloc_domain_nested()16801680+ */16811681+ if (!list_empty(&pdom->viommu_list))16821682+ ret |= iommu_flush_pages_v1_hdom_ids(pdom, address, size);1725168317261684 return ret;17271685}···20742010 return ret;20752011}2076201220772077-static void make_clear_dte(struct iommu_dev_data *dev_data, struct dev_table_entry *ptr,20782078- struct dev_table_entry *new)20792079-{20802080- /* All existing DTE must have V bit set */20812081- new->data128[0] = DTE_FLAG_V;20822082- new->data128[1] = 0;20832083-}20842084-20852013/*20862014 * Note:20872015 * The old value for GCR3 table and GPT have been cleared from caller.20882016 */20892089-static void set_dte_gcr3_table(struct amd_iommu *iommu,20902090- struct iommu_dev_data *dev_data,20912091- struct dev_table_entry *target)20172017+static void set_dte_gcr3_table(struct iommu_dev_data *dev_data,20182018+ struct dev_table_entry *new)20922019{20932020 struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;20942094- u64 gcr3;20212021+ u64 gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);2095202220962096- if (!gcr3_info->gcr3_tbl)20972097- return;20232023+ new->data[0] |= DTE_FLAG_TV |20242024+ (dev_data->ppr ? DTE_FLAG_PPR : 0) |20252025+ (pdom_is_v2_pgtbl_mode(dev_data->domain) ? DTE_FLAG_GIOV : 0) |20262026+ DTE_FLAG_GV |20272027+ FIELD_PREP(DTE_GLX, gcr3_info->glx) |20282028+ FIELD_PREP(DTE_GCR3_14_12, gcr3 >> 12) |20292029+ DTE_FLAG_IR | DTE_FLAG_IW;2098203020992099- pr_debug("%s: devid=%#x, glx=%#x, gcr3_tbl=%#llx\n",21002100- __func__, dev_data->devid, gcr3_info->glx,21012101- (unsigned long long)gcr3_info->gcr3_tbl);21022102-21032103- gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);21042104-21052105- target->data[0] |= DTE_FLAG_GV |21062106- FIELD_PREP(DTE_GLX, gcr3_info->glx) |21072107- FIELD_PREP(DTE_GCR3_14_12, gcr3 >> 12);21082108- if (pdom_is_v2_pgtbl_mode(dev_data->domain))21092109- target->data[0] |= DTE_FLAG_GIOV;21102110-21112111- target->data[1] |= FIELD_PREP(DTE_GCR3_30_15, gcr3 >> 15) |21122112- FIELD_PREP(DTE_GCR3_51_31, gcr3 >> 31);20312031+ new->data[1] |= FIELD_PREP(DTE_DOMID_MASK, dev_data->gcr3_info.domid) |20322032+ FIELD_PREP(DTE_GCR3_30_15, gcr3 >> 15) |20332033+ (dev_data->ats_enabled ? DTE_FLAG_IOTLB : 0) |20342034+ FIELD_PREP(DTE_GCR3_51_31, gcr3 >> 31);2113203521142036 /* Guest page table can only support 4 and 5 levels */21152037 if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL)21162116- target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_5_LEVEL);20382038+ new->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_5_LEVEL);21172039 else21182118- target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_4_LEVEL);20402040+ new->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_4_LEVEL);20412041+}20422042+20432043+void amd_iommu_set_dte_v1(struct iommu_dev_data *dev_data,20442044+ struct protection_domain *domain, u16 domid,20452045+ struct pt_iommu_amdv1_hw_info *pt_info,20462046+ struct dev_table_entry *new)20472047+{20482048+ u64 host_pt_root = __sme_set(pt_info->host_pt_root);20492049+20502050+ /* Note Dirty tracking is used for v1 table only for now */20512051+ new->data[0] |= DTE_FLAG_TV |20522052+ FIELD_PREP(DTE_MODE_MASK, pt_info->mode) |20532053+ (domain->dirty_tracking ? DTE_FLAG_HAD : 0) |20542054+ FIELD_PREP(DTE_HOST_TRP, host_pt_root >> 12) |20552055+ DTE_FLAG_IR | DTE_FLAG_IW;20562056+20572057+ new->data[1] |= FIELD_PREP(DTE_DOMID_MASK, domid) |20582058+ (dev_data->ats_enabled ? DTE_FLAG_IOTLB : 0);20592059+}20602060+20612061+static void set_dte_v1(struct iommu_dev_data *dev_data,20622062+ struct protection_domain *domain, u16 domid,20632063+ phys_addr_t top_paddr, unsigned int top_level,20642064+ struct dev_table_entry *new)20652065+{20662066+ struct pt_iommu_amdv1_hw_info pt_info;20672067+20682068+ /*20692069+ * When updating the IO pagetable, the new top and level20702070+ * are provided as parameters. For other operations i.e.20712071+ * device attach, retrieve the current pagetable info20722072+ * via the IOMMU PT API.20732073+ */20742074+ if (top_paddr) {20752075+ pt_info.host_pt_root = top_paddr;20762076+ pt_info.mode = top_level + 1;20772077+ } else {20782078+ WARN_ON(top_paddr || top_level);20792079+ pt_iommu_amdv1_hw_info(&domain->amdv1, &pt_info);20802080+ }20812081+20822082+ amd_iommu_set_dte_v1(dev_data, domain, domid, &pt_info, new);20832083+}20842084+20852085+static void set_dte_passthrough(struct iommu_dev_data *dev_data,20862086+ struct protection_domain *domain,20872087+ struct dev_table_entry *new)20882088+{20892089+ new->data[0] |= DTE_FLAG_TV | DTE_FLAG_IR | DTE_FLAG_IW;20902090+20912091+ new->data[1] |= FIELD_PREP(DTE_DOMID_MASK, domain->id) |20922092+ (dev_data->ats_enabled) ? DTE_FLAG_IOTLB : 0;21192093}2120209421212095static void set_dte_entry(struct amd_iommu *iommu,21222096 struct iommu_dev_data *dev_data,21232097 phys_addr_t top_paddr, unsigned int top_level)21242098{21252125- u16 domid;21262099 u32 old_domid;21272127- struct dev_table_entry *initial_dte;21282100 struct dev_table_entry new = {};21292101 struct protection_domain *domain = dev_data->domain;21302102 struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;21312103 struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid];21322132- struct pt_iommu_amdv1_hw_info pt_info;2133210421342134- make_clear_dte(dev_data, dte, &new);21052105+ amd_iommu_make_clear_dte(dev_data, &new);2135210621362136- if (gcr3_info && gcr3_info->gcr3_tbl)21372137- domid = dev_data->gcr3_info.domid;21382138- else {21392139- domid = domain->id;21072107+ old_domid = READ_ONCE(dte->data[1]) & DTE_DOMID_MASK;21082108+ if (gcr3_info->gcr3_tbl)21092109+ set_dte_gcr3_table(dev_data, &new);21102110+ else if (domain->domain.type == IOMMU_DOMAIN_IDENTITY)21112111+ set_dte_passthrough(dev_data, domain, &new);21122112+ else if ((domain->domain.type & __IOMMU_DOMAIN_PAGING) &&21132113+ domain->pd_mode == PD_MODE_V1)21142114+ set_dte_v1(dev_data, domain, domain->id, top_paddr, top_level, &new);21152115+ else21162116+ WARN_ON(true);2140211721412141- if (domain->domain.type & __IOMMU_DOMAIN_PAGING) {21422142- /*21432143- * When updating the IO pagetable, the new top and level21442144- * are provided as parameters. For other operations i.e.21452145- * device attach, retrieve the current pagetable info21462146- * via the IOMMU PT API.21472147- */21482148- if (top_paddr) {21492149- pt_info.host_pt_root = top_paddr;21502150- pt_info.mode = top_level + 1;21512151- } else {21522152- WARN_ON(top_paddr || top_level);21532153- pt_iommu_amdv1_hw_info(&domain->amdv1,21542154- &pt_info);21552155- }21562156-21572157- new.data[0] |= __sme_set(pt_info.host_pt_root) |21582158- (pt_info.mode & DEV_ENTRY_MODE_MASK)21592159- << DEV_ENTRY_MODE_SHIFT;21602160- }21612161- }21622162-21632163- new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW;21642164-21652165- /*21662166- * When SNP is enabled, we can only support TV=1 with non-zero domain ID.21672167- * This is prevented by the SNP-enable and IOMMU_DOMAIN_IDENTITY check in21682168- * do_iommu_domain_alloc().21692169- */21702170- WARN_ON(amd_iommu_snp_en && (domid == 0));21712171- new.data[0] |= DTE_FLAG_TV;21722172-21732173- if (dev_data->ppr)21742174- new.data[0] |= 1ULL << DEV_ENTRY_PPR;21752175-21762176- if (domain->dirty_tracking)21772177- new.data[0] |= DTE_FLAG_HAD;21782178-21792179- if (dev_data->ats_enabled)21802180- new.data[1] |= DTE_FLAG_IOTLB;21812181-21822182- old_domid = READ_ONCE(dte->data[1]) & DEV_DOMID_MASK;21832183- new.data[1] |= domid;21842184-21852185- /*21862186- * Restore cached persistent DTE bits, which can be set by information21872187- * in IVRS table. See set_dev_entry_from_acpi().21882188- */21892189- initial_dte = amd_iommu_get_ivhd_dte_flags(iommu->pci_seg->id, dev_data->devid);21902190- if (initial_dte) {21912191- new.data128[0] |= initial_dte->data128[0];21922192- new.data128[1] |= initial_dte->data128[1];21932193- }21942194-21952195- set_dte_gcr3_table(iommu, dev_data, &new);21962196-21972197- update_dte256(iommu, dev_data, &new);21182118+ amd_iommu_update_dte(iommu, dev_data, &new);2198211921992120 /*22002121 * A kdump kernel might be replacing a domain ID that was copied from···21972148static void clear_dte_entry(struct amd_iommu *iommu, struct iommu_dev_data *dev_data)21982149{21992150 struct dev_table_entry new = {};22002200- struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid];2201215122022202- make_clear_dte(dev_data, dte, &new);22032203- update_dte256(iommu, dev_data, &new);21522152+ amd_iommu_make_clear_dte(dev_data, &new);21532153+ amd_iommu_update_dte(iommu, dev_data, &new);22042154}2205215522062156/* Update and flush DTE for the given device */···22112163 set_dte_entry(iommu, dev_data, 0, 0);22122164 else22132165 clear_dte_entry(iommu, dev_data);22142214-22152215- clone_aliases(iommu, dev_data->dev);22162216- device_flush_dte(dev_data);22172217- iommu_completion_wait(iommu);22182166}2219216722202168/*···25432499 spin_lock_init(&domain->lock);25442500 INIT_LIST_HEAD(&domain->dev_list);25452501 INIT_LIST_HEAD(&domain->dev_data_list);25022502+ INIT_LIST_HEAD(&domain->viommu_list);25462503 xa_init(&domain->iommu_array);25472504}25482505···28052760 return &domain->domain;28062761}2807276227632763+static inline bool is_nest_parent_supported(u32 flags)27642764+{27652765+ /* Only allow nest parent when these features are supported */27662766+ return check_feature(FEATURE_GT) &&27672767+ check_feature(FEATURE_GIOSUP) &&27682768+ check_feature2(FEATURE_GCR3TRPMODE);27692769+}27702770+28082771static struct iommu_domain *28092772amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,28102773 const struct iommu_user_data *user_data)···28202767{28212768 struct amd_iommu *iommu = get_amd_iommu_from_dev(dev);28222769 const u32 supported_flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |28232823- IOMMU_HWPT_ALLOC_PASID;27702770+ IOMMU_HWPT_ALLOC_PASID |27712771+ IOMMU_HWPT_ALLOC_NEST_PARENT;2824277228252773 if ((flags & ~supported_flags) || user_data)28262774 return ERR_PTR(-EOPNOTSUPP);2827277528282776 switch (flags & supported_flags) {28292777 case IOMMU_HWPT_ALLOC_DIRTY_TRACKING:28302830- /* Allocate domain with v1 page table for dirty tracking */28312831- if (!amd_iommu_hd_support(iommu))27782778+ case IOMMU_HWPT_ALLOC_NEST_PARENT:27792779+ case IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_NEST_PARENT:27802780+ /*27812781+ * Allocate domain with v1 page table for dirty tracking27822782+ * and/or Nest parent.27832783+ */27842784+ if ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) &&27852785+ !amd_iommu_hd_support(iommu))28322786 break;27872787+27882788+ if ((flags & IOMMU_HWPT_ALLOC_NEST_PARENT) &&27892789+ !is_nest_parent_supported(flags))27902790+ break;27912791+28332792 return amd_iommu_domain_alloc_paging_v1(dev, flags);28342793 case IOMMU_HWPT_ALLOC_PASID:28352794 /* Allocate domain with v2 page table if IOMMU supports PASID. */···3143307831443079const struct iommu_ops amd_iommu_ops = {31453080 .capable = amd_iommu_capable,30813081+ .hw_info = amd_iommufd_hw_info,31463082 .blocked_domain = &blocked_domain,31473083 .release_domain = &blocked_domain,31483084 .identity_domain = &identity_domain.domain,···31563090 .is_attach_deferred = amd_iommu_is_attach_deferred,31573091 .def_domain_type = amd_iommu_def_domain_type,31583092 .page_response = amd_iommu_page_response,30933093+ .get_viommu_size = amd_iommufd_get_viommu_size,30943094+ .viommu_init = amd_iommufd_viommu_init,31593095};3160309631613097#ifdef CONFIG_IRQ_REMAP···31823114 return;3183311531843116 build_inv_irt(&cmd, devid);31853185- data = atomic64_inc_return(&iommu->cmd_sem_val);31863186- build_completion_wait(&cmd2, iommu, data);3187311731883118 raw_spin_lock_irqsave(&iommu->lock, flags);31193119+ data = get_cmdsem_val(iommu);31203120+ build_completion_wait(&cmd2, iommu, data);31213121+31893122 ret = __iommu_queue_command_sync(iommu, &cmd, true);31903123 if (ret)31913191- goto out;31243124+ goto out_err;31923125 ret = __iommu_queue_command_sync(iommu, &cmd2, false);31933126 if (ret)31943194- goto out;31273127+ goto out_err;31283128+ raw_spin_unlock_irqrestore(&iommu->lock, flags);31293129+31953130 wait_on_sem(iommu, data);31963196-out:31313131+ return;31323132+31333133+out_err:31973134 raw_spin_unlock_irqrestore(&iommu->lock, flags);31983135}31993136···33123239 struct irq_remap_table *new_table = NULL;33133240 struct amd_iommu_pci_seg *pci_seg;33143241 unsigned long flags;33153315- int nid = iommu && iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;32423242+ int nid = iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;33163243 u16 alias;3317324433183245 spin_lock_irqsave(&iommu_table_lock, flags);
···11+// SPDX-License-Identifier: GPL-2.0-only22+/*33+ * Copyright (C) 2025 Advanced Micro Devices, Inc.44+ */55+66+#define dev_fmt(fmt) "AMD-Vi: " fmt77+88+#include <linux/iommu.h>99+#include <linux/refcount.h>1010+#include <uapi/linux/iommufd.h>1111+1212+#include "amd_iommu.h"1313+1414+static const struct iommu_domain_ops nested_domain_ops;1515+1616+static inline struct nested_domain *to_ndomain(struct iommu_domain *dom)1717+{1818+ return container_of(dom, struct nested_domain, domain);1919+}2020+2121+/*2222+ * Validate guest DTE to make sure that configuration for host (v1)2323+ * and guest (v2) page tables are valid when allocating nested domain.2424+ */2525+static int validate_gdte_nested(struct iommu_hwpt_amd_guest *gdte)2626+{2727+ u32 gpt_level = FIELD_GET(DTE_GPT_LEVEL_MASK, gdte->dte[2]);2828+2929+ /* Must be zero: Mode, Host-TPR */3030+ if (FIELD_GET(DTE_MODE_MASK, gdte->dte[0]) != 0 ||3131+ FIELD_GET(DTE_HOST_TRP, gdte->dte[0]) != 0)3232+ return -EINVAL;3333+3434+ /* GCR3 TRP must be non-zero if V, GV is set */3535+ if (FIELD_GET(DTE_FLAG_V, gdte->dte[0]) == 1 &&3636+ FIELD_GET(DTE_FLAG_GV, gdte->dte[0]) == 1 &&3737+ FIELD_GET(DTE_GCR3_14_12, gdte->dte[0]) == 0 &&3838+ FIELD_GET(DTE_GCR3_30_15, gdte->dte[1]) == 0 &&3939+ FIELD_GET(DTE_GCR3_51_31, gdte->dte[1]) == 0)4040+ return -EINVAL;4141+4242+ /* Valid Guest Paging Mode values are 0 and 1 */4343+ if (gpt_level != GUEST_PGTABLE_4_LEVEL &&4444+ gpt_level != GUEST_PGTABLE_5_LEVEL)4545+ return -EINVAL;4646+4747+ /* GLX = 3 is reserved */4848+ if (FIELD_GET(DTE_GLX, gdte->dte[0]) == 3)4949+ return -EINVAL;5050+5151+ /*5252+ * We need to check host capability before setting5353+ * the Guest Paging Mode5454+ */5555+ if (gpt_level == GUEST_PGTABLE_5_LEVEL &&5656+ amd_iommu_gpt_level < PAGE_MODE_5_LEVEL)5757+ return -EOPNOTSUPP;5858+5959+ return 0;6060+}6161+6262+static void *gdom_info_load_or_alloc_locked(struct xarray *xa, unsigned long index)6363+{6464+ struct guest_domain_mapping_info *elm, *res;6565+6666+ elm = xa_load(xa, index);6767+ if (elm)6868+ return elm;6969+7070+ xa_unlock(xa);7171+ elm = kzalloc(sizeof(struct guest_domain_mapping_info), GFP_KERNEL);7272+ xa_lock(xa);7373+ if (!elm)7474+ return ERR_PTR(-ENOMEM);7575+7676+ res = __xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);7777+ if (xa_is_err(res))7878+ res = ERR_PTR(xa_err(res));7979+8080+ if (res) {8181+ kfree(elm);8282+ return res;8383+ }8484+8585+ refcount_set(&elm->users, 0);8686+ return elm;8787+}8888+8989+/*9090+ * This function is assigned to struct iommufd_viommu_ops.alloc_domain_nested()9191+ * during the call to struct iommu_ops.viommu_init().9292+ */9393+struct iommu_domain *9494+amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,9595+ const struct iommu_user_data *user_data)9696+{9797+ int ret;9898+ struct nested_domain *ndom;9999+ struct guest_domain_mapping_info *gdom_info;100100+ struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core);101101+102102+ if (user_data->type != IOMMU_HWPT_DATA_AMD_GUEST)103103+ return ERR_PTR(-EOPNOTSUPP);104104+105105+ ndom = kzalloc(sizeof(*ndom), GFP_KERNEL);106106+ if (!ndom)107107+ return ERR_PTR(-ENOMEM);108108+109109+ ret = iommu_copy_struct_from_user(&ndom->gdte, user_data,110110+ IOMMU_HWPT_DATA_AMD_GUEST,111111+ dte);112112+ if (ret)113113+ goto out_err;114114+115115+ ret = validate_gdte_nested(&ndom->gdte);116116+ if (ret)117117+ goto out_err;118118+119119+ ndom->gdom_id = FIELD_GET(DTE_DOMID_MASK, ndom->gdte.dte[1]);120120+ ndom->domain.ops = &nested_domain_ops;121121+ ndom->domain.type = IOMMU_DOMAIN_NESTED;122122+ ndom->viommu = aviommu;123123+124124+ /*125125+ * Normally, when a guest has multiple pass-through devices,126126+ * the IOMMU driver setup DTEs with the same stage-2 table and127127+ * use the same host domain ID (hDomId). In case of nested translation,128128+ * if the guest setup different stage-1 tables with same PASID,129129+ * IOMMU would use the same TLB tag. This will results in TLB130130+ * aliasing issue.131131+ *132132+ * The guest is assigning gDomIDs based on its own algorithm for managing133133+ * cache tags of (DomID, PASID). Within a single viommu, the nest parent domain134134+ * (w/ S2 table) is used by all DTEs. But we need to consistently map the gDomID135135+ * to a single hDomID. This is done using an xarray in the vIOMMU to136136+ * keep track of the gDomID mapping. When the S2 is changed, the INVALIDATE_IOMMU_PAGES137137+ * command must be issued for each hDomID in the xarray.138138+ */139139+ xa_lock(&aviommu->gdomid_array);140140+141141+ gdom_info = gdom_info_load_or_alloc_locked(&aviommu->gdomid_array, ndom->gdom_id);142142+ if (IS_ERR(gdom_info)) {143143+ xa_unlock(&aviommu->gdomid_array);144144+ ret = PTR_ERR(gdom_info);145145+ goto out_err;146146+ }147147+148148+ /* Check if gDomID exist */149149+ if (refcount_inc_not_zero(&gdom_info->users)) {150150+ ndom->gdom_info = gdom_info;151151+ xa_unlock(&aviommu->gdomid_array);152152+153153+ pr_debug("%s: Found gdom_id=%#x, hdom_id=%#x\n",154154+ __func__, ndom->gdom_id, gdom_info->hdom_id);155155+156156+ return &ndom->domain;157157+ }158158+159159+ /* The gDomID does not exist. We allocate new hdom_id */160160+ gdom_info->hdom_id = amd_iommu_pdom_id_alloc();161161+ if (gdom_info->hdom_id <= 0) {162162+ __xa_cmpxchg(&aviommu->gdomid_array,163163+ ndom->gdom_id, gdom_info, NULL, GFP_ATOMIC);164164+ xa_unlock(&aviommu->gdomid_array);165165+ ret = -ENOSPC;166166+ goto out_err_gdom_info;167167+ }168168+169169+ ndom->gdom_info = gdom_info;170170+ refcount_set(&gdom_info->users, 1);171171+172172+ xa_unlock(&aviommu->gdomid_array);173173+174174+ pr_debug("%s: Allocate gdom_id=%#x, hdom_id=%#x\n",175175+ __func__, ndom->gdom_id, gdom_info->hdom_id);176176+177177+ return &ndom->domain;178178+179179+out_err_gdom_info:180180+ kfree(gdom_info);181181+out_err:182182+ kfree(ndom);183183+ return ERR_PTR(ret);184184+}185185+186186+static void set_dte_nested(struct amd_iommu *iommu, struct iommu_domain *dom,187187+ struct iommu_dev_data *dev_data, struct dev_table_entry *new)188188+{189189+ struct protection_domain *parent;190190+ struct nested_domain *ndom = to_ndomain(dom);191191+ struct iommu_hwpt_amd_guest *gdte = &ndom->gdte;192192+ struct pt_iommu_amdv1_hw_info pt_info;193193+194194+ /*195195+ * The nest parent domain is attached during the call to the196196+ * struct iommu_ops.viommu_init(), which will be stored as part197197+ * of the struct amd_iommu_viommu.parent.198198+ */199199+ if (WARN_ON(!ndom->viommu || !ndom->viommu->parent))200200+ return;201201+202202+ parent = ndom->viommu->parent;203203+ amd_iommu_make_clear_dte(dev_data, new);204204+205205+ /* Retrieve the current pagetable info via the IOMMU PT API. */206206+ pt_iommu_amdv1_hw_info(&parent->amdv1, &pt_info);207207+208208+ /*209209+ * Use domain ID from nested domain to program DTE.210210+ * See amd_iommu_alloc_domain_nested().211211+ */212212+ amd_iommu_set_dte_v1(dev_data, parent, ndom->gdom_info->hdom_id,213213+ &pt_info, new);214214+215215+ /* GV is required for nested page table */216216+ new->data[0] |= DTE_FLAG_GV;217217+218218+ /* Guest PPR */219219+ new->data[0] |= gdte->dte[0] & DTE_FLAG_PPR;220220+221221+ /* Guest translation stuff */222222+ new->data[0] |= gdte->dte[0] & (DTE_GLX | DTE_FLAG_GIOV);223223+224224+ /* GCR3 table */225225+ new->data[0] |= gdte->dte[0] & DTE_GCR3_14_12;226226+ new->data[1] |= gdte->dte[1] & (DTE_GCR3_30_15 | DTE_GCR3_51_31);227227+228228+ /* Guest paging mode */229229+ new->data[2] |= gdte->dte[2] & DTE_GPT_LEVEL_MASK;230230+}231231+232232+static int nested_attach_device(struct iommu_domain *dom, struct device *dev,233233+ struct iommu_domain *old)234234+{235235+ struct dev_table_entry new = {0};236236+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);237237+ struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);238238+ int ret = 0;239239+240240+ /*241241+ * Needs to make sure PASID is not enabled242242+ * for this attach path.243243+ */244244+ if (WARN_ON(dev_data->pasid_enabled))245245+ return -EINVAL;246246+247247+ mutex_lock(&dev_data->mutex);248248+249249+ set_dte_nested(iommu, dom, dev_data, &new);250250+251251+ amd_iommu_update_dte(iommu, dev_data, &new);252252+253253+ mutex_unlock(&dev_data->mutex);254254+255255+ return ret;256256+}257257+258258+static void nested_domain_free(struct iommu_domain *dom)259259+{260260+ struct guest_domain_mapping_info *curr;261261+ struct nested_domain *ndom = to_ndomain(dom);262262+ struct amd_iommu_viommu *aviommu = ndom->viommu;263263+264264+ xa_lock(&aviommu->gdomid_array);265265+266266+ if (!refcount_dec_and_test(&ndom->gdom_info->users)) {267267+ xa_unlock(&aviommu->gdomid_array);268268+ return;269269+ }270270+271271+ /*272272+ * The refcount for the gdom_id to hdom_id mapping is zero.273273+ * It is now safe to remove the mapping.274274+ */275275+ curr = __xa_cmpxchg(&aviommu->gdomid_array, ndom->gdom_id,276276+ ndom->gdom_info, NULL, GFP_ATOMIC);277277+278278+ xa_unlock(&aviommu->gdomid_array);279279+ if (WARN_ON(!curr || xa_err(curr)))280280+ return;281281+282282+ /* success */283283+ pr_debug("%s: Free gdom_id=%#x, hdom_id=%#x\n",284284+ __func__, ndom->gdom_id, curr->hdom_id);285285+286286+ amd_iommu_pdom_id_free(ndom->gdom_info->hdom_id);287287+ kfree(curr);288288+ kfree(ndom);289289+}290290+291291+static const struct iommu_domain_ops nested_domain_ops = {292292+ .attach_dev = nested_attach_device,293293+ .free = nested_domain_free,294294+};
-1
drivers/iommu/arm/Kconfig
···121121122122config TEGRA241_CMDQV123123 bool "NVIDIA Tegra241 CMDQ-V extension support for ARM SMMUv3"124124- depends on ACPI125124 help126125 Support for NVIDIA CMDQ-Virtualization extension for ARM SMMUv3. The127126 CMDQ-V extension is similar to v3.3 ECMDQ for multi command queues
···177177 * config bit here base this off the EATS value in the STE. If the EATS178178 * is set then the VM must generate ATC flushes.179179 */180180- state.disable_ats = !nested_domain->enable_ats;180180+ if (FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0])) ==181181+ STRTAB_STE_0_CFG_S1_TRANS)182182+ state.disable_ats = !nested_domain->enable_ats;181183 ret = arm_smmu_attach_prepare(&state, domain);182184 if (ret) {183185 mutex_unlock(&arm_smmu_asid_lock);
···487487 */488488static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)489489{490490- int val;491491-492490 /*493493- * We can try to avoid the cmpxchg() loop by simply incrementing the494494- * lock counter. When held in exclusive state, the lock counter is set495495- * to INT_MIN so these increments won't hurt as the value will remain496496- * negative.491491+ * When held in exclusive state, the lock counter is set to INT_MIN492492+ * so these increments won't hurt as the value will remain negative.493493+ * The increment will also signal the exclusive locker that there are494494+ * shared waiters.497495 */498496 if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)499497 return;500498501501- do {502502- val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);503503- } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);499499+ /*500500+ * Someone else is holding the lock in exclusive state, so wait501501+ * for them to finish. Since we already incremented the lock counter,502502+ * no exclusive lock can be acquired until we finish. We don't need503503+ * the return value since we only care that the exclusive lock is504504+ * released (i.e. the lock counter is non-negative).505505+ * Once the exclusive locker releases the lock, the sign bit will506506+ * be cleared and our increment will make the lock counter positive,507507+ * allowing us to proceed.508508+ */509509+ atomic_cond_read_relaxed(&cmdq->lock, VAL > 0);504510}505511506512static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)···533527 __ret; \534528})535529530530+/*531531+ * Only clear the sign bit when releasing the exclusive lock this will532532+ * allow any shared_lock() waiters to proceed without the possibility533533+ * of entering the exclusive lock in a tight loop.534534+ */536535#define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \537536({ \538538- atomic_set_release(&cmdq->lock, 0); \537537+ atomic_fetch_andnot_release(INT_MIN, &cmdq->lock); \539538 local_irq_restore(flags); \540539})541540···10931082}10941083EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_used);1095108410851085+VISIBLE_IF_KUNIT10861086+void arm_smmu_get_ste_update_safe(const __le64 *cur, const __le64 *target,10871087+ __le64 *safe_bits)10881088+{10891089+ const __le64 eats_s1chk =10901090+ FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_S1CHK);10911091+ const __le64 eats_trans =10921092+ FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS);10931093+10941094+ /*10951095+ * When an STE changes EATS_TRANS, the sequencing code in the attach10961096+ * logic already will have the PCI cap for ATS disabled. Thus at this10971097+ * moment we can expect that the device will not generate ATS queries10981098+ * and so we don't care about the sequencing of EATS. The purpose of10991099+ * EATS_TRANS is to protect the system from hostile untrusted devices11001100+ * that issue ATS when the PCI config space is disabled. However, if11011101+ * EATS_TRANS is being changed, then we must have already trusted the11021102+ * device as the EATS_TRANS security block is being disabled.11031103+ *11041104+ * Note: now the EATS_TRANS update is moved to the first entry_set().11051105+ * Changing S2S and EATS might transiently result in S2S=1 and EATS=111061106+ * which is a bad STE (see "5.2 Stream Table Entry"). In such a case,11071107+ * we can't do a hitless update. Also, it should not be added to the11081108+ * safe bits with STRTAB_STE_1_EATS_S1CHK, because EATS=0b11 would be11091109+ * effectively an errant 0b00 configuration.11101110+ */11111111+ if (!((cur[1] | target[1]) & cpu_to_le64(eats_s1chk)) &&11121112+ !((cur[2] | target[2]) & cpu_to_le64(STRTAB_STE_2_S2S)))11131113+ safe_bits[1] |= cpu_to_le64(eats_trans);11141114+11151115+ /*11161116+ * MEV does not meaningfully impact the operation of the HW, it only11171117+ * changes how many fault events are generated, thus we can relax it11181118+ * when computing the ordering. The spec notes the device can act like11191119+ * MEV=1 anyhow:11201120+ *11211121+ * Note: Software must expect, and be able to deal with, coalesced11221122+ * fault records even when MEV == 0.11231123+ */11241124+ safe_bits[1] |= cpu_to_le64(STRTAB_STE_1_MEV);11251125+}11261126+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_update_safe);11271127+10961128/*10971129 * Figure out if we can do a hitless update of entry to become target. Returns a10981130 * bit mask where 1 indicates that qword needs to be set disruptively.···11481094{11491095 __le64 target_used[NUM_ENTRY_QWORDS] = {};11501096 __le64 cur_used[NUM_ENTRY_QWORDS] = {};10971097+ __le64 safe[NUM_ENTRY_QWORDS] = {};11511098 u8 used_qword_diff = 0;11521099 unsigned int i;1153110011541101 writer->ops->get_used(entry, cur_used);11551102 writer->ops->get_used(target, target_used);11031103+ if (writer->ops->get_update_safe)11041104+ writer->ops->get_update_safe(entry, target, safe);1156110511571106 for (i = 0; i != NUM_ENTRY_QWORDS; i++) {11071107+ /*11081108+ * Safe is only used for bits that are used by both entries,11091109+ * otherwise it is sequenced according to the unused entry.11101110+ */11111111+ safe[i] &= target_used[i] & cur_used[i];11121112+11581113 /*11591114 * Check that masks are up to date, the make functions are not11601115 * allowed to set a bit to 1 if the used function doesn't say it···11721109 WARN_ON_ONCE(target[i] & ~target_used[i]);1173111011741111 /* Bits can change because they are not currently being used */11121112+ cur_used[i] &= ~safe[i];11751113 unused_update[i] = (entry[i] & cur_used[i]) |11761114 (target[i] & ~cur_used[i]);11771115 /*···11851121 return used_qword_diff;11861122}1187112311881188-static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry,11241124+static void entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry,11891125 const __le64 *target, unsigned int start,11901126 unsigned int len)11911127{···1201113712021138 if (changed)12031139 writer->ops->sync(writer);12041204- return changed;12051140}1206114112071142/*···12701207 entry_set(writer, entry, target, 0, 1);12711208 } else {12721209 /*12731273- * No inuse bit changed. Sanity check that all unused bits are 012741274- * in the entry. The target was already sanity checked by12751275- * compute_qword_diff().12101210+ * No inuse bit changed, though safe bits may have changed.12761211 */12771277- WARN_ON_ONCE(12781278- entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS));12121212+ entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS);12791213 }12801214}12811215EXPORT_SYMBOL_IF_KUNIT(arm_smmu_write_entry);···16031543static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = {16041544 .sync = arm_smmu_ste_writer_sync_entry,16051545 .get_used = arm_smmu_get_ste_used,15461546+ .get_update_safe = arm_smmu_get_ste_update_safe,16061547};1607154816081549static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,···26122551 ARM_SMMU_FEAT_VAX) ? 52 : 48;2613255226142553 pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);26152615- pgtbl_cfg.oas = smmu->ias;25542554+ pgtbl_cfg.oas = smmu->oas;26162555 if (enable_dirty)26172556 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;26182557 fmt = ARM_64_LPAE_S1;···26222561 case ARM_SMMU_DOMAIN_S2:26232562 if (enable_dirty)26242563 return -EOPNOTSUPP;26252625- pgtbl_cfg.ias = smmu->ias;25642564+ pgtbl_cfg.ias = smmu->oas;26262565 pgtbl_cfg.oas = smmu->oas;26272566 fmt = ARM_64_LPAE_S2;26282567 finalise_stage_fn = arm_smmu_domain_finalise_s2;···31863125 struct arm_smmu_domain *smmu_domain, ioasid_t pasid,31873126 struct arm_smmu_cd *cd, struct iommu_domain *old)31883127{31893189- struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev);31283128+ struct iommu_domain *sid_domain =31293129+ iommu_driver_get_domain_for_dev(master->dev);31903130 struct arm_smmu_attach_state state = {31913131 .master = master,31923132 .ssid = pasid,···32533191 */32543192 if (!arm_smmu_ssids_in_use(&master->cd_table)) {32553193 struct iommu_domain *sid_domain =32563256- iommu_get_domain_for_dev(master->dev);31943194+ iommu_driver_get_domain_for_dev(master->dev);3257319532583196 if (sid_domain->type == IOMMU_DOMAIN_IDENTITY ||32593197 sid_domain->type == IOMMU_DOMAIN_BLOCKED)···44574395 }4458439644594397 /* We only support the AArch64 table format at present */44604460- switch (FIELD_GET(IDR0_TTF, reg)) {44614461- case IDR0_TTF_AARCH32_64:44624462- smmu->ias = 40;44634463- fallthrough;44644464- case IDR0_TTF_AARCH64:44654465- break;44664466- default:43984398+ if (!(FIELD_GET(IDR0_TTF, reg) & IDR0_TTF_AARCH64)) {44674399 dev_err(smmu->dev, "AArch64 table format not supported!\n");44684400 return -ENXIO;44694401 }···45704514 dev_warn(smmu->dev,45714515 "failed to set DMA mask for table walker\n");4572451645734573- smmu->ias = max(smmu->ias, smmu->oas);45744574-45754517 if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&45764518 (smmu->features & ARM_SMMU_FEAT_TRANS_S2))45774519 smmu->features |= ARM_SMMU_FEAT_NESTING;···45794525 if (arm_smmu_sva_supported(smmu))45804526 smmu->features |= ARM_SMMU_FEAT_SVA;4581452745824582- dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",45834583- smmu->ias, smmu->oas, smmu->features);45284528+ dev_info(smmu->dev, "oas %lu-bit (features 0x%08x)\n",45294529+ smmu->oas, smmu->features);45844530 return 0;45854531}45324532+45334533+#ifdef CONFIG_TEGRA241_CMDQV45344534+static void tegra_cmdqv_dt_probe(struct device_node *smmu_node,45354535+ struct arm_smmu_device *smmu)45364536+{45374537+ struct platform_device *pdev;45384538+ struct device_node *np;45394539+45404540+ np = of_parse_phandle(smmu_node, "nvidia,cmdqv", 0);45414541+ if (!np)45424542+ return;45434543+45444544+ /* Tegra241 CMDQV driver is responsible for put_device() */45454545+ pdev = of_find_device_by_node(np);45464546+ of_node_put(np);45474547+ if (!pdev)45484548+ return;45494549+45504550+ smmu->impl_dev = &pdev->dev;45514551+ smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV;45524552+ dev_dbg(smmu->dev, "found companion CMDQV device: %s\n",45534553+ dev_name(smmu->impl_dev));45544554+}45554555+#else45564556+static void tegra_cmdqv_dt_probe(struct device_node *smmu_node,45574557+ struct arm_smmu_device *smmu)45584558+{45594559+}45604560+#endif4586456145874562#ifdef CONFIG_ACPI45884563#ifdef CONFIG_TEGRA241_CMDQV···46254542 adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1);46264543 if (adev) {46274544 /* Tegra241 CMDQV driver is responsible for put_device() */46284628- smmu->impl_dev = &adev->dev;45454545+ smmu->impl_dev = get_device(acpi_get_first_physical_node(adev));46294546 smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV;46304547 dev_info(smmu->dev, "found companion CMDQV device: %s\n",46314548 dev_name(smmu->impl_dev));45494549+ acpi_dev_put(adev);46324550 }46334551 kfree(uid);46344552}···4717463347184634 if (of_dma_is_coherent(dev->of_node))47194635 smmu->features |= ARM_SMMU_FEAT_COHERENCY;46364636+46374637+ if (of_device_is_compatible(dev->of_node, "nvidia,tegra264-smmu"))46384638+ tegra_cmdqv_dt_probe(dev->of_node, smmu);4720463947214640 return ret;47224641}
···354354 * Ideally we'd have an IOMMU_ENCRYPTED flag set by higher levels to355355 * control this. For now if the tables use sme_set then so do the ptes.356356 */357357- if (pt_feature(common, PT_FEAT_AMDV1_ENCRYPT_TABLES))357357+ if (pt_feature(common, PT_FEAT_AMDV1_ENCRYPT_TABLES) &&358358+ !(iommu_prot & IOMMU_MMIO))358359 pte = __sme_set(pte);359360360361 attrs->descriptor_bits = pte;
+2-1
drivers/iommu/generic_pt/fmt/x86_64.h
···227227 * Ideally we'd have an IOMMU_ENCRYPTED flag set by higher levels to228228 * control this. For now if the tables use sme_set then so do the ptes.229229 */230230- if (pt_feature(common, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES))230230+ if (pt_feature(common, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES) &&231231+ !(iommu_prot & IOMMU_MMIO))231232 pte = __sme_set(pte);232233233234 attrs->descriptor_bits = pte;
+1-2
drivers/iommu/generic_pt/iommu_pt.h
···5858 * Note that the sync frees the gather's free list, so we must5959 * not have any pages on that list that are covered by iova/len6060 */6161- } else if (pt_feature(common, PT_FEAT_FLUSH_RANGE)) {6262- iommu_iotlb_gather_add_range(iotlb_gather, iova, len);6361 }64626363+ iommu_iotlb_gather_add_range(iotlb_gather, iova, len);6564 iommu_pages_list_splice(free_list, &iotlb_gather->freelist);6665}6766
+8-1
drivers/iommu/intel/cache.c
···363363 qi_batch_increment_index(iommu, batch);364364}365365366366+static bool intel_domain_use_piotlb(struct dmar_domain *domain)367367+{368368+ return domain->domain.type == IOMMU_DOMAIN_SVA ||369369+ domain->domain.type == IOMMU_DOMAIN_NESTED ||370370+ intel_domain_is_fs_paging(domain);371371+}372372+366373static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,367374 unsigned long addr, unsigned long pages,368375 unsigned long mask, int ih)···377370 struct intel_iommu *iommu = tag->iommu;378371 u64 type = DMA_TLB_PSI_FLUSH;379372380380- if (intel_domain_is_fs_paging(domain)) {373373+ if (intel_domain_use_piotlb(domain)) {381374 qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr,382375 pages, ih, domain->qi_batch);383376 return;
···900900901901static inline void context_set_present(struct context_entry *context)902902{903903- context->lo |= 1;903903+ u64 val;904904+905905+ dma_wmb();906906+ val = READ_ONCE(context->lo) | 1;907907+ WRITE_ONCE(context->lo, val);908908+}909909+910910+/*911911+ * Clear the Present (P) bit (bit 0) of a context table entry. This initiates912912+ * the transition of the entry's ownership from hardware to software. The913913+ * caller is responsible for fulfilling the invalidation handshake recommended914914+ * by the VT-d spec, Section 6.5.3.3 (Guidance to Software for Invalidations).915915+ */916916+static inline void context_clear_present(struct context_entry *context)917917+{918918+ u64 val;919919+920920+ val = READ_ONCE(context->lo) & GENMASK_ULL(63, 1);921921+ WRITE_ONCE(context->lo, val);922922+ dma_wmb();904923}905924906925static inline void context_set_fault_enable(struct context_entry *context)
···6161 int id;6262 struct iommu_domain *default_domain;6363 struct iommu_domain *blocking_domain;6464+ /*6565+ * During a group device reset, @resetting_domain points to the physical6666+ * domain, while @domain points to the attached domain before the reset.6767+ */6868+ struct iommu_domain *resetting_domain;6469 struct iommu_domain *domain;6570 struct list_head entry;6671 unsigned int owner_cnt;···236231 nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL);237232 if (!nb)238233 return -ENOMEM;234234+235235+ iommu_debug_init();239236240237 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) {241238 nb[i].notifier_call = iommu_bus_notifier;···668661 }669662670663 if (group->default_domain)671671- iommu_setup_dma_ops(dev);664664+ iommu_setup_dma_ops(dev, group->default_domain);672665673666 mutex_unlock(&group->mutex);674667···11801173 struct device *dev)11811174{11821175 struct iommu_resv_region *entry;11831183- struct list_head mappings;11761176+ LIST_HEAD(mappings);11841177 unsigned long pg_size;11851178 int ret = 0;1186117911871180 pg_size = domain->pgsize_bitmap ? 1UL << __ffs(domain->pgsize_bitmap) : 0;11881188- INIT_LIST_HEAD(&mappings);1189118111901182 if (WARN_ON_ONCE(iommu_is_dma_domain(domain) && !pg_size))11911183 return -EINVAL;···19551949 return ret;19561950 }19571951 for_each_group_device(group, gdev)19581958- iommu_setup_dma_ops(gdev->dev);19521952+ iommu_setup_dma_ops(gdev->dev, group->default_domain);19591953 mutex_unlock(&group->mutex);1960195419611955 /*···2191218521922186int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)21932187{21942194- if (dev->iommu && dev->iommu->attach_deferred)21952195- return __iommu_attach_device(domain, dev, NULL);21882188+ /*21892189+ * This is called on the dma mapping fast path so avoid locking. This is21902190+ * racy, but we have an expectation that the driver will setup its DMAs21912191+ * inside probe while being single threaded to avoid racing.21922192+ */21932193+ if (!dev->iommu || !dev->iommu->attach_deferred)21942194+ return 0;2196219521972197- return 0;21962196+ guard(mutex)(&dev->iommu_group->mutex);21972197+21982198+ /*21992199+ * This is a concurrent attach during a device reset. Reject it until22002200+ * pci_dev_reset_iommu_done() attaches the device to group->domain.22012201+ *22022202+ * Note that this might fail the iommu_dma_map(). But there's nothing22032203+ * more we can do here.22042204+ */22052205+ if (dev->iommu_group->resetting_domain)22062206+ return -EBUSY;22072207+ return __iommu_attach_device(domain, dev, NULL);21982208}2199220922002210void iommu_detach_device(struct iommu_domain *domain, struct device *dev)···22322210}22332211EXPORT_SYMBOL_GPL(iommu_detach_device);2234221222132213+/**22142214+ * iommu_get_domain_for_dev() - Return the DMA API domain pointer22152215+ * @dev: Device to query22162216+ *22172217+ * This function can be called within a driver bound to dev. The returned22182218+ * pointer is valid for the lifetime of the bound driver.22192219+ *22202220+ * It should not be called by drivers with driver_managed_dma = true.22212221+ */22352222struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)22362223{22372224 /* Caller must be a probed driver on dev */···22492218 if (!group)22502219 return NULL;2251222022212221+ lockdep_assert_not_held(&group->mutex);22222222+22522223 return group->domain;22532224}22542225EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);22262226+22272227+/**22282228+ * iommu_driver_get_domain_for_dev() - Return the driver-level domain pointer22292229+ * @dev: Device to query22302230+ *22312231+ * This function can be called by an iommu driver that wants to get the physical22322232+ * domain within an iommu callback function where group->mutex is held.22332233+ */22342234+struct iommu_domain *iommu_driver_get_domain_for_dev(struct device *dev)22352235+{22362236+ struct iommu_group *group = dev->iommu_group;22372237+22382238+ lockdep_assert_held(&group->mutex);22392239+22402240+ /*22412241+ * Driver handles the low-level __iommu_attach_device(), including the22422242+ * one invoked by pci_dev_reset_iommu_done() re-attaching the device to22432243+ * the cached group->domain. In this case, the driver must get the old22442244+ * domain from group->resetting_domain rather than group->domain. This22452245+ * prevents it from re-attaching the device from group->domain (old) to22462246+ * group->domain (new).22472247+ */22482248+ if (group->resetting_domain)22492249+ return group->resetting_domain;22502250+22512251+ return group->domain;22522252+}22532253+EXPORT_SYMBOL_GPL(iommu_driver_get_domain_for_dev);2255225422562255/*22572256 * For IOMMU_DOMAIN_DMA implementations which already provide their own···2434237324352374 if (WARN_ON(!new_domain))24362375 return -EINVAL;23762376+23772377+ /*23782378+ * This is a concurrent attach during a device reset. Reject it until23792379+ * pci_dev_reset_iommu_done() attaches the device to group->domain.23802380+ */23812381+ if (group->resetting_domain)23822382+ return -EBUSY;2437238324382384 /*24392385 * Changing the domain is done by calling attach_dev() on the new···26302562 }2631256326322564 /* unroll mapping in case something went wrong */26332633- if (ret)25652565+ if (ret) {26342566 iommu_unmap(domain, orig_iova, orig_size - size);26352635- else25672567+ } else {26362568 trace_map(orig_iova, orig_paddr, orig_size);25692569+ iommu_debug_map(domain, orig_paddr, orig_size);25702570+ }2637257126382572 return ret;26392573}···2697262726982628 pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size);2699262926302630+ iommu_debug_unmap_begin(domain, iova, size);26312631+27002632 /*27012633 * Keep iterating until we either unmap 'size' bytes (or more)27022634 * or we hit an area that isn't mapped.···27192647 }2720264827212649 trace_unmap(orig_iova, size, unmapped);26502650+ iommu_debug_unmap_end(domain, orig_iova, size, unmapped);27222651 return unmapped;27232652}27242653···3221314832223149 /* Make sure dma_ops is appropriatley set */32233150 for_each_group_device(group, gdev)32243224- iommu_setup_dma_ops(gdev->dev);31513151+ iommu_setup_dma_ops(gdev->dev, group->default_domain);3225315232263153out_unlock:32273154 mutex_unlock(&group->mutex);···35653492 return -EINVAL;3566349335673494 mutex_lock(&group->mutex);34953495+34963496+ /*34973497+ * This is a concurrent attach during a device reset. Reject it until34983498+ * pci_dev_reset_iommu_done() attaches the device to group->domain.34993499+ */35003500+ if (group->resetting_domain) {35013501+ ret = -EBUSY;35023502+ goto out_unlock;35033503+ }35043504+35683505 for_each_group_device(group, device) {35693506 /*35703507 * Skip PASID validation for devices without PASID support···36583575 return -EINVAL;3659357636603577 mutex_lock(&group->mutex);35783578+35793579+ /*35803580+ * This is a concurrent attach during a device reset. Reject it until35813581+ * pci_dev_reset_iommu_done() attaches the device to group->domain.35823582+ */35833583+ if (group->resetting_domain) {35843584+ ret = -EBUSY;35853585+ goto out_unlock;35863586+ }35873587+36613588 entry = iommu_make_pasid_array_entry(domain, handle);36623589 curr = xa_cmpxchg(&group->pasid_array, pasid, NULL,36633590 XA_ZERO_ENTRY, GFP_KERNEL);···39243831 return ret;39253832}39263833EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL");38343834+38353835+/**38363836+ * pci_dev_reset_iommu_prepare() - Block IOMMU to prepare for a PCI device reset38373837+ * @pdev: PCI device that is going to enter a reset routine38383838+ *38393839+ * The PCIe r6.0, sec 10.3.1 IMPLEMENTATION NOTE recommends to disable and block38403840+ * ATS before initiating a reset. This means that a PCIe device during the reset38413841+ * routine wants to block any IOMMU activity: translation and ATS invalidation.38423842+ *38433843+ * This function attaches the device's RID/PASID(s) the group->blocking_domain,38443844+ * setting the group->resetting_domain. This allows the IOMMU driver pausing any38453845+ * IOMMU activity while leaving the group->domain pointer intact. Later when the38463846+ * reset is finished, pci_dev_reset_iommu_done() can restore everything.38473847+ *38483848+ * Caller must use pci_dev_reset_iommu_prepare() with pci_dev_reset_iommu_done()38493849+ * before/after the core-level reset routine, to unset the resetting_domain.38503850+ *38513851+ * Return: 0 on success or negative error code if the preparation failed.38523852+ *38533853+ * These two functions are designed to be used by PCI reset functions that would38543854+ * not invoke any racy iommu_release_device(), since PCI sysfs node gets removed38553855+ * before it notifies with a BUS_NOTIFY_REMOVED_DEVICE. When using them in other38563856+ * case, callers must ensure there will be no racy iommu_release_device() call,38573857+ * which otherwise would UAF the dev->iommu_group pointer.38583858+ */38593859+int pci_dev_reset_iommu_prepare(struct pci_dev *pdev)38603860+{38613861+ struct iommu_group *group = pdev->dev.iommu_group;38623862+ unsigned long pasid;38633863+ void *entry;38643864+ int ret;38653865+38663866+ if (!pci_ats_supported(pdev) || !dev_has_iommu(&pdev->dev))38673867+ return 0;38683868+38693869+ guard(mutex)(&group->mutex);38703870+38713871+ /* Re-entry is not allowed */38723872+ if (WARN_ON(group->resetting_domain))38733873+ return -EBUSY;38743874+38753875+ ret = __iommu_group_alloc_blocking_domain(group);38763876+ if (ret)38773877+ return ret;38783878+38793879+ /* Stage RID domain at blocking_domain while retaining group->domain */38803880+ if (group->domain != group->blocking_domain) {38813881+ ret = __iommu_attach_device(group->blocking_domain, &pdev->dev,38823882+ group->domain);38833883+ if (ret)38843884+ return ret;38853885+ }38863886+38873887+ /*38883888+ * Stage PASID domains at blocking_domain while retaining pasid_array.38893889+ *38903890+ * The pasid_array is mostly fenced by group->mutex, except one reader38913891+ * in iommu_attach_handle_get(), so it's safe to read without xa_lock.38923892+ */38933893+ xa_for_each_start(&group->pasid_array, pasid, entry, 1)38943894+ iommu_remove_dev_pasid(&pdev->dev, pasid,38953895+ pasid_array_entry_to_domain(entry));38963896+38973897+ group->resetting_domain = group->blocking_domain;38983898+ return ret;38993899+}39003900+EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_prepare);39013901+39023902+/**39033903+ * pci_dev_reset_iommu_done() - Restore IOMMU after a PCI device reset is done39043904+ * @pdev: PCI device that has finished a reset routine39053905+ *39063906+ * After a PCIe device finishes a reset routine, it wants to restore its IOMMU39073907+ * IOMMU activity, including new translation as well as cache invalidation, by39083908+ * re-attaching all RID/PASID of the device's back to the domains retained in39093909+ * the core-level structure.39103910+ *39113911+ * Caller must pair it with a successful pci_dev_reset_iommu_prepare().39123912+ *39133913+ * Note that, although unlikely, there is a risk that re-attaching domains might39143914+ * fail due to some unexpected happening like OOM.39153915+ */39163916+void pci_dev_reset_iommu_done(struct pci_dev *pdev)39173917+{39183918+ struct iommu_group *group = pdev->dev.iommu_group;39193919+ unsigned long pasid;39203920+ void *entry;39213921+39223922+ if (!pci_ats_supported(pdev) || !dev_has_iommu(&pdev->dev))39233923+ return;39243924+39253925+ guard(mutex)(&group->mutex);39263926+39273927+ /* pci_dev_reset_iommu_prepare() was bypassed for the device */39283928+ if (!group->resetting_domain)39293929+ return;39303930+39313931+ /* pci_dev_reset_iommu_prepare() was not successfully called */39323932+ if (WARN_ON(!group->blocking_domain))39333933+ return;39343934+39353935+ /* Re-attach RID domain back to group->domain */39363936+ if (group->domain != group->blocking_domain) {39373937+ WARN_ON(__iommu_attach_device(group->domain, &pdev->dev,39383938+ group->blocking_domain));39393939+ }39403940+39413941+ /*39423942+ * Re-attach PASID domains back to the domains retained in pasid_array.39433943+ *39443944+ * The pasid_array is mostly fenced by group->mutex, except one reader39453945+ * in iommu_attach_handle_get(), so it's safe to read without xa_lock.39463946+ */39473947+ xa_for_each_start(&group->pasid_array, pasid, entry, 1)39483948+ WARN_ON(__iommu_set_group_pasid(39493949+ pasid_array_entry_to_domain(entry), group, pasid,39503950+ group->blocking_domain));39513951+39523952+ group->resetting_domain = NULL;39533953+}39543954+EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_done);3927395539283956#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)39293957/**
+13-4
drivers/pci/pci-acpi.c
···991010#include <linux/delay.h>1111#include <linux/init.h>1212+#include <linux/iommu.h>1213#include <linux/irqdomain.h>1314#include <linux/pci.h>1415#include <linux/msi.h>···972971int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)973972{974973 acpi_handle handle = ACPI_HANDLE(&dev->dev);974974+ int ret;975975976976 if (!handle || !acpi_has_method(handle, "_RST"))977977 return -ENOTTY;···980978 if (probe)981979 return 0;982980983983- if (ACPI_FAILURE(acpi_evaluate_object(handle, "_RST", NULL, NULL))) {984984- pci_warn(dev, "ACPI _RST failed\n");985985- return -ENOTTY;981981+ ret = pci_dev_reset_iommu_prepare(dev);982982+ if (ret) {983983+ pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret);984984+ return ret;986985 }987986988988- return 0;987987+ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_RST", NULL, NULL))) {988988+ pci_warn(dev, "ACPI _RST failed\n");989989+ ret = -ENOTTY;990990+ }991991+992992+ pci_dev_reset_iommu_done(dev);993993+ return ret;989994}990995991996bool acpi_pci_power_manageable(struct pci_dev *dev)
+58-7
drivers/pci/pci.c
···1313#include <linux/delay.h>1414#include <linux/dmi.h>1515#include <linux/init.h>1616+#include <linux/iommu.h>1617#include <linux/msi.h>1718#include <linux/of.h>1819#include <linux/pci.h>···2625#include <linux/logic_pio.h>2726#include <linux/device.h>2827#include <linux/pm_runtime.h>2828+#include <linux/pci-ats.h>2929#include <linux/pci_hotplug.h>3030#include <linux/vmalloc.h>3131#include <asm/dma.h>···43324330 */43334331int pcie_flr(struct pci_dev *dev)43344332{43334333+ int ret;43344334+43354335 if (!pci_wait_for_pending_transaction(dev))43364336 pci_err(dev, "timed out waiting for pending transaction; performing function level reset anyway\n");43374337+43384338+ /* Have to call it after waiting for pending DMA transaction */43394339+ ret = pci_dev_reset_iommu_prepare(dev);43404340+ if (ret) {43414341+ pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret);43424342+ return ret;43434343+ }4337434443384345 pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);4339434643404347 if (dev->imm_ready)43414341- return 0;43484348+ goto done;4342434943434350 /*43444351 * Per PCIe r4.0, sec 6.6.2, a device must complete an FLR within···43564345 */43574346 msleep(100);4358434743594359- return pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS);43484348+ ret = pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS);43494349+done:43504350+ pci_dev_reset_iommu_done(dev);43514351+ return ret;43604352}43614353EXPORT_SYMBOL_GPL(pcie_flr);43624354···4387437343884374static int pci_af_flr(struct pci_dev *dev, bool probe)43894375{43764376+ int ret;43904377 int pos;43914378 u8 cap;43924379···44144399 PCI_AF_STATUS_TP << 8))44154400 pci_err(dev, "timed out waiting for pending transaction; performing AF function level reset anyway\n");4416440144024402+ /* Have to call it after waiting for pending DMA transaction */44034403+ ret = pci_dev_reset_iommu_prepare(dev);44044404+ if (ret) {44054405+ pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret);44064406+ return ret;44074407+ }44084408+44174409 pci_write_config_byte(dev, pos + PCI_AF_CTRL, PCI_AF_CTRL_FLR);4418441044194411 if (dev->imm_ready)44204420- return 0;44124412+ goto done;4421441344224414 /*44234415 * Per Advanced Capabilities for Conventional PCI ECN, 13 April 2006,···44344412 */44354413 msleep(100);4436441444374437- return pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS);44154415+ ret = pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS);44164416+done:44174417+ pci_dev_reset_iommu_done(dev);44184418+ return ret;44384419}4439442044404421/**···44584433static int pci_pm_reset(struct pci_dev *dev, bool probe)44594434{44604435 u16 csr;44364436+ int ret;4461443744624438 if (!dev->pm_cap || dev->dev_flags & PCI_DEV_FLAGS_NO_PM_RESET)44634439 return -ENOTTY;···44734447 if (dev->current_state != PCI_D0)44744448 return -EINVAL;4475444944504450+ ret = pci_dev_reset_iommu_prepare(dev);44514451+ if (ret) {44524452+ pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret);44534453+ return ret;44544454+ }44554455+44764456 csr &= ~PCI_PM_CTRL_STATE_MASK;44774457 csr |= PCI_D3hot;44784458 pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);···44894457 pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);44904458 pci_dev_d3_sleep(dev);4491445944924492- return pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS);44604460+ ret = pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS);44614461+ pci_dev_reset_iommu_done(dev);44624462+ return ret;44934463}4494446444954465/**···49194885 return -ENOTTY;49204886 }4921488748884888+ rc = pci_dev_reset_iommu_prepare(dev);48894889+ if (rc) {48904890+ pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", rc);48914891+ return rc;48924892+ }48934893+49224894 rc = pci_dev_reset_slot_function(dev, probe);49234895 if (rc != -ENOTTY)49244924- return rc;49254925- return pci_parent_bus_reset(dev, probe);48964896+ goto done;48974897+48984898+ rc = pci_parent_bus_reset(dev, probe);48994899+done:49004900+ pci_dev_reset_iommu_done(dev);49014901+ return rc;49264902}4927490349284904static int cxl_reset_bus_function(struct pci_dev *dev, bool probe)···49564912 if (rc)49574913 return -ENOTTY;4958491449154915+ rc = pci_dev_reset_iommu_prepare(dev);49164916+ if (rc) {49174917+ pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", rc);49184918+ return rc;49194919+ }49204920+49594921 if (reg & PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR) {49604922 val = reg;49614923 } else {···49764926 pci_write_config_word(bridge, dvsec + PCI_DVSEC_CXL_PORT_CTL,49774927 reg);4978492849294929+ pci_dev_reset_iommu_done(dev);49794930 return rc;49804931}49814932
+18-1
drivers/pci/quirks.c
···2121#include <linux/pci.h>2222#include <linux/isa-dma.h> /* isa_dma_bridge_buggy */2323#include <linux/init.h>2424+#include <linux/iommu.h>2425#include <linux/delay.h>2526#include <linux/acpi.h>2627#include <linux/dmi.h>···42294228 { 0 }42304229};4231423042314231+static int __pci_dev_specific_reset(struct pci_dev *dev, bool probe,42324232+ const struct pci_dev_reset_methods *i)42334233+{42344234+ int ret;42354235+42364236+ ret = pci_dev_reset_iommu_prepare(dev);42374237+ if (ret) {42384238+ pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret);42394239+ return ret;42404240+ }42414241+42424242+ ret = i->reset(dev, probe);42434243+ pci_dev_reset_iommu_done(dev);42444244+ return ret;42454245+}42464246+42324247/*42334248 * These device-specific reset methods are here rather than in a driver42344249 * because when a host assigns a device to a guest VM, the host may need···42594242 i->vendor == (u16)PCI_ANY_ID) &&42604243 (i->device == dev->device ||42614244 i->device == (u16)PCI_ANY_ID))42624262- return i->reset(dev, probe);42454245+ return __pci_dev_specific_reset(dev, probe, i);42634246 }4264424742654248 return -ENOTTY;
···466466};467467468468/**469469+ * struct iommu_hwpt_amd_guest - AMD IOMMU guest I/O page table data470470+ * (IOMMU_HWPT_DATA_AMD_GUEST)471471+ * @dte: Guest Device Table Entry (DTE)472472+ */473473+struct iommu_hwpt_amd_guest {474474+ __aligned_u64 dte[4];475475+};476476+477477+/**469478 * enum iommu_hwpt_data_type - IOMMU HWPT Data Type470479 * @IOMMU_HWPT_DATA_NONE: no data471480 * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table472481 * @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table482482+ * @IOMMU_HWPT_DATA_AMD_GUEST: AMD IOMMU guest page table473483 */474484enum iommu_hwpt_data_type {475485 IOMMU_HWPT_DATA_NONE = 0,476486 IOMMU_HWPT_DATA_VTD_S1 = 1,477487 IOMMU_HWPT_DATA_ARM_SMMUV3 = 2,488488+ IOMMU_HWPT_DATA_AMD_GUEST = 3,478489};479490480491/**···635624};636625637626/**627627+ * struct iommu_hw_info_amd - AMD IOMMU device info628628+ *629629+ * @efr : Value of AMD IOMMU Extended Feature Register (EFR)630630+ * @efr2: Value of AMD IOMMU Extended Feature 2 Register (EFR2)631631+ *632632+ * Please See description of these registers in the following sections of633633+ * the AMD I/O Virtualization Technology (IOMMU) Specification.634634+ * (https://docs.amd.com/v/u/en-US/48882_3.10_PUB)635635+ *636636+ * - MMIO Offset 0030h IOMMU Extended Feature Register637637+ * - MMIO Offset 01A0h IOMMU Extended Feature 2 Register638638+ *639639+ * Note: The EFR and EFR2 are raw values reported by hardware.640640+ * VMM is responsible to determine the appropriate flags to be exposed to641641+ * the VM since cetertain features are not currently supported by the kernel642642+ * for HW-vIOMMU.643643+ *644644+ * Current VMM-allowed list of feature flags are:645645+ * - EFR[GTSup, GASup, GioSup, PPRSup, EPHSup, GATS, GLX, PASmax]646646+ */647647+struct iommu_hw_info_amd {648648+ __aligned_u64 efr;649649+ __aligned_u64 efr2;650650+};651651+652652+/**638653 * enum iommu_hw_info_type - IOMMU Hardware Info Types639654 * @IOMMU_HW_INFO_TYPE_NONE: Output by the drivers that do not report hardware640655 * info···669632 * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type670633 * @IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM671634 * SMMUv3) info type635635+ * @IOMMU_HW_INFO_TYPE_AMD: AMD IOMMU info type672636 */673637enum iommu_hw_info_type {674638 IOMMU_HW_INFO_TYPE_NONE = 0,···677639 IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,678640 IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2,679641 IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV = 3,642642+ IOMMU_HW_INFO_TYPE_AMD = 4,680643};681644682645/**
+4
include/uapi/linux/vfio.h
···964964 * hwpt corresponding to the given pt_id.965965 *966966 * Return: 0 on success, -errno on failure.967967+ *968968+ * When a device is resetting, -EBUSY will be returned to reject any concurrent969969+ * attachment to the resetting device itself or any sibling device in the IOMMU970970+ * group having the resetting device.967971 */968972struct vfio_device_attach_iommufd_pt {969973 __u32 argsz;
+27
mm/page_ext.c
···1111#include <linux/page_table_check.h>1212#include <linux/rcupdate.h>1313#include <linux/pgalloc_tag.h>1414+#include <linux/iommu-debug-pagealloc.h>14151516/*1617 * struct page extension···8988#endif9089#ifdef CONFIG_PAGE_TABLE_CHECK9190 &page_table_check_ops,9191+#endif9292+#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC9393+ &page_iommu_debug_ops,9294#endif9395};9496···536532 }537533538534 return page_ext;535535+}536536+537537+/**538538+ * page_ext_from_phys() - Get the page_ext structure for a physical address.539539+ * @phys: The physical address to query.540540+ *541541+ * This function safely gets the `struct page_ext` associated with a given542542+ * physical address. It performs validation to ensure the address corresponds543543+ * to a valid, online struct page before attempting to access it.544544+ * It returns NULL for MMIO, ZONE_DEVICE, holes and offline memory.545545+ *546546+ * Return: NULL if no page_ext exists for this physical address.547547+ * Context: Any context. Caller may not sleep until they have called548548+ * page_ext_put().549549+ */550550+struct page_ext *page_ext_from_phys(phys_addr_t phys)551551+{552552+ struct page *page = pfn_to_online_page(__phys_to_pfn(phys));553553+554554+ if (!page)555555+ return NULL;556556+557557+ return page_ext_get(page);539558}540559541560/**
···11+// SPDX-License-Identifier: GPL-2.022+33+//! Rust support related to IOMMU.44+55+pub mod pgtable;
+279
rust/kernel/iommu/pgtable.rs
···11+// SPDX-License-Identifier: GPL-2.022+33+//! IOMMU page table management.44+//!55+//! C header: [`include/linux/io-pgtable.h`](srctree/include/linux/io-pgtable.h)66+77+use core::{88+ marker::PhantomData,99+ ptr::NonNull, //1010+};1111+1212+use crate::{1313+ alloc,1414+ bindings,1515+ device::{1616+ Bound,1717+ Device, //1818+ },1919+ devres::Devres,2020+ error::to_result,2121+ io::PhysAddr,2222+ prelude::*, //2323+};2424+2525+use bindings::io_pgtable_fmt;2626+2727+/// Protection flags used with IOMMU mappings.2828+pub mod prot {2929+ /// Read access.3030+ pub const READ: u32 = bindings::IOMMU_READ;3131+ /// Write access.3232+ pub const WRITE: u32 = bindings::IOMMU_WRITE;3333+ /// Request cache coherency.3434+ pub const CACHE: u32 = bindings::IOMMU_CACHE;3535+ /// Request no-execute permission.3636+ pub const NOEXEC: u32 = bindings::IOMMU_NOEXEC;3737+ /// MMIO peripheral mapping.3838+ pub const MMIO: u32 = bindings::IOMMU_MMIO;3939+ /// Privileged mapping.4040+ pub const PRIVILEGED: u32 = bindings::IOMMU_PRIV;4141+}4242+4343+/// Represents a requested `io_pgtable` configuration.4444+pub struct Config {4545+ /// Quirk bitmask (type-specific).4646+ pub quirks: usize,4747+ /// Valid page sizes, as a bitmask of powers of two.4848+ pub pgsize_bitmap: usize,4949+ /// Input address space size in bits.5050+ pub ias: u32,5151+ /// Output address space size in bits.5252+ pub oas: u32,5353+ /// IOMMU uses coherent accesses for page table walks.5454+ pub coherent_walk: bool,5555+}5656+5757+/// An io page table using a specific format.5858+///5959+/// # Invariants6060+///6161+/// The pointer references a valid io page table.6262+pub struct IoPageTable<F: IoPageTableFmt> {6363+ ptr: NonNull<bindings::io_pgtable_ops>,6464+ _marker: PhantomData<F>,6565+}6666+6767+// SAFETY: `struct io_pgtable_ops` is not restricted to a single thread.6868+unsafe impl<F: IoPageTableFmt> Send for IoPageTable<F> {}6969+// SAFETY: `struct io_pgtable_ops` may be accessed concurrently.7070+unsafe impl<F: IoPageTableFmt> Sync for IoPageTable<F> {}7171+7272+/// The format used by this page table.7373+pub trait IoPageTableFmt: 'static {7474+ /// The value representing this format.7575+ const FORMAT: io_pgtable_fmt;7676+}7777+7878+impl<F: IoPageTableFmt> IoPageTable<F> {7979+ /// Create a new `IoPageTable` as a device resource.8080+ #[inline]8181+ pub fn new(8282+ dev: &Device<Bound>,8383+ config: Config,8484+ ) -> impl PinInit<Devres<IoPageTable<F>>, Error> + '_ {8585+ // SAFETY: Devres ensures that the value is dropped during device unbind.8686+ Devres::new(dev, unsafe { Self::new_raw(dev, config) })8787+ }8888+8989+ /// Create a new `IoPageTable`.9090+ ///9191+ /// # Safety9292+ ///9393+ /// If successful, then the returned `IoPageTable` must be dropped before the device is9494+ /// unbound.9595+ #[inline]9696+ pub unsafe fn new_raw(dev: &Device<Bound>, config: Config) -> Result<IoPageTable<F>> {9797+ let mut raw_cfg = bindings::io_pgtable_cfg {9898+ quirks: config.quirks,9999+ pgsize_bitmap: config.pgsize_bitmap,100100+ ias: config.ias,101101+ oas: config.oas,102102+ coherent_walk: config.coherent_walk,103103+ tlb: &raw const NOOP_FLUSH_OPS,104104+ iommu_dev: dev.as_raw(),105105+ // SAFETY: All zeroes is a valid value for `struct io_pgtable_cfg`.106106+ ..unsafe { core::mem::zeroed() }107107+ };108108+109109+ // SAFETY:110110+ // * The raw_cfg pointer is valid for the duration of this call.111111+ // * The provided `FLUSH_OPS` contains valid function pointers that accept a null pointer112112+ // as cookie.113113+ // * The caller ensures that the io pgtable does not outlive the device.114114+ let ops = unsafe {115115+ bindings::alloc_io_pgtable_ops(F::FORMAT, &mut raw_cfg, core::ptr::null_mut())116116+ };117117+118118+ // INVARIANT: We successfully created a valid page table.119119+ Ok(IoPageTable {120120+ ptr: NonNull::new(ops).ok_or(ENOMEM)?,121121+ _marker: PhantomData,122122+ })123123+ }124124+125125+ /// Obtain a raw pointer to the underlying `struct io_pgtable_ops`.126126+ #[inline]127127+ pub fn raw_ops(&self) -> *mut bindings::io_pgtable_ops {128128+ self.ptr.as_ptr()129129+ }130130+131131+ /// Obtain a raw pointer to the underlying `struct io_pgtable`.132132+ #[inline]133133+ pub fn raw_pgtable(&self) -> *mut bindings::io_pgtable {134134+ // SAFETY: The io_pgtable_ops of an io-pgtable is always the ops field of a io_pgtable.135135+ unsafe { kernel::container_of!(self.raw_ops(), bindings::io_pgtable, ops) }136136+ }137137+138138+ /// Obtain a raw pointer to the underlying `struct io_pgtable_cfg`.139139+ #[inline]140140+ pub fn raw_cfg(&self) -> *mut bindings::io_pgtable_cfg {141141+ // SAFETY: The `raw_pgtable()` method returns a valid pointer.142142+ unsafe { &raw mut (*self.raw_pgtable()).cfg }143143+ }144144+145145+ /// Map a physically contiguous range of pages of the same size.146146+ ///147147+ /// Even if successful, this operation may not map the entire range. In that case, only a148148+ /// prefix of the range is mapped, and the returned integer indicates its length in bytes. In149149+ /// this case, the caller will usually call `map_pages` again for the remaining range.150150+ ///151151+ /// The returned [`Result`] indicates whether an error was encountered while mapping pages.152152+ /// Note that this may return a non-zero length even if an error was encountered. The caller153153+ /// will usually [unmap the relevant pages](Self::unmap_pages) on error.154154+ ///155155+ /// The caller must flush the TLB before using the pgtable to access the newly created mapping.156156+ ///157157+ /// # Safety158158+ ///159159+ /// * No other io-pgtable operation may access the range `iova .. iova+pgsize*pgcount` while160160+ /// this `map_pages` operation executes.161161+ /// * This page table must not contain any mapping that overlaps with the mapping created by162162+ /// this call.163163+ /// * If this page table is live, then the caller must ensure that it's okay to access the164164+ /// physical address being mapped for the duration in which it is mapped.165165+ #[inline]166166+ pub unsafe fn map_pages(167167+ &self,168168+ iova: usize,169169+ paddr: PhysAddr,170170+ pgsize: usize,171171+ pgcount: usize,172172+ prot: u32,173173+ flags: alloc::Flags,174174+ ) -> (usize, Result) {175175+ let mut mapped: usize = 0;176176+177177+ // SAFETY: The `map_pages` function in `io_pgtable_ops` is never null.178178+ let map_pages = unsafe { (*self.raw_ops()).map_pages.unwrap_unchecked() };179179+180180+ // SAFETY: The safety requirements of this method are sufficient to call `map_pages`.181181+ let ret = to_result(unsafe {182182+ (map_pages)(183183+ self.raw_ops(),184184+ iova,185185+ paddr,186186+ pgsize,187187+ pgcount,188188+ prot as i32,189189+ flags.as_raw(),190190+ &mut mapped,191191+ )192192+ });193193+194194+ (mapped, ret)195195+ }196196+197197+ /// Unmap a range of virtually contiguous pages of the same size.198198+ ///199199+ /// This may not unmap the entire range, and returns the length of the unmapped prefix in200200+ /// bytes.201201+ ///202202+ /// # Safety203203+ ///204204+ /// * No other io-pgtable operation may access the range `iova .. iova+pgsize*pgcount` while205205+ /// this `unmap_pages` operation executes.206206+ /// * This page table must contain one or more consecutive mappings starting at `iova` whose207207+ /// total size is `pgcount * pgsize`.208208+ #[inline]209209+ #[must_use]210210+ pub unsafe fn unmap_pages(&self, iova: usize, pgsize: usize, pgcount: usize) -> usize {211211+ // SAFETY: The `unmap_pages` function in `io_pgtable_ops` is never null.212212+ let unmap_pages = unsafe { (*self.raw_ops()).unmap_pages.unwrap_unchecked() };213213+214214+ // SAFETY: The safety requirements of this method are sufficient to call `unmap_pages`.215215+ unsafe { (unmap_pages)(self.raw_ops(), iova, pgsize, pgcount, core::ptr::null_mut()) }216216+ }217217+}218218+219219+// For the initial users of these rust bindings, the GPU FW is managing the IOTLB and performs all220220+// required invalidations using a range. There is no need for it get ARM style invalidation221221+// instructions from the page table code.222222+//223223+// Support for flushing the TLB with ARM style invalidation instructions may be added in the224224+// future.225225+static NOOP_FLUSH_OPS: bindings::iommu_flush_ops = bindings::iommu_flush_ops {226226+ tlb_flush_all: Some(rust_tlb_flush_all_noop),227227+ tlb_flush_walk: Some(rust_tlb_flush_walk_noop),228228+ tlb_add_page: None,229229+};230230+231231+#[no_mangle]232232+extern "C" fn rust_tlb_flush_all_noop(_cookie: *mut core::ffi::c_void) {}233233+234234+#[no_mangle]235235+extern "C" fn rust_tlb_flush_walk_noop(236236+ _iova: usize,237237+ _size: usize,238238+ _granule: usize,239239+ _cookie: *mut core::ffi::c_void,240240+) {241241+}242242+243243+impl<F: IoPageTableFmt> Drop for IoPageTable<F> {244244+ fn drop(&mut self) {245245+ // SAFETY: The caller of `Self::ttbr()` promised that the page table is not live when this246246+ // destructor runs.247247+ unsafe { bindings::free_io_pgtable_ops(self.raw_ops()) };248248+ }249249+}250250+251251+/// The `ARM_64_LPAE_S1` page table format.252252+pub enum ARM64LPAES1 {}253253+254254+impl IoPageTableFmt for ARM64LPAES1 {255255+ const FORMAT: io_pgtable_fmt = bindings::io_pgtable_fmt_ARM_64_LPAE_S1 as io_pgtable_fmt;256256+}257257+258258+impl IoPageTable<ARM64LPAES1> {259259+ /// Access the `ttbr` field of the configuration.260260+ ///261261+ /// This is the physical address of the page table, which may be passed to the device that262262+ /// needs to use it.263263+ ///264264+ /// # Safety265265+ ///266266+ /// The caller must ensure that the device stops using the page table before dropping it.267267+ #[inline]268268+ pub unsafe fn ttbr(&self) -> u64 {269269+ // SAFETY: `arm_lpae_s1_cfg` is the right cfg type for `ARM64LPAES1`.270270+ unsafe { (*self.raw_cfg()).__bindgen_anon_1.arm_lpae_s1_cfg.ttbr }271271+ }272272+273273+ /// Access the `mair` field of the configuration.274274+ #[inline]275275+ pub fn mair(&self) -> u64 {276276+ // SAFETY: `arm_lpae_s1_cfg` is the right cfg type for `ARM64LPAES1`.277277+ unsafe { (*self.raw_cfg()).__bindgen_anon_1.arm_lpae_s1_cfg.mair }278278+ }279279+}