···2626#include <linux/pci.h>2727#include <linux/pci-ats.h>2828#include <linux/platform_device.h>2929+#include <linux/sort.h>2930#include <linux/string_choices.h>3031#include <kunit/visibility.h>3132#include <uapi/linux/iommufd.h>···108107};109108110109static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);110110+static bool arm_smmu_ats_supported(struct arm_smmu_master *master);111111112112static void parse_driver_options(struct arm_smmu_device *smmu)113113{···10281026 */10291027}1030102810311031-/* Context descriptor manipulation functions */10321032-void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)10291029+/* Invalidation array manipulation functions */10301030+static inline struct arm_smmu_inv *10311031+arm_smmu_invs_iter_next(struct arm_smmu_invs *invs, size_t next, size_t *idx)10331032{10341034- struct arm_smmu_cmdq_ent cmd = {10351035- .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?10361036- CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,10371037- .tlbi.asid = asid,10381038- };10391039-10401040- arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);10331033+ while (true) {10341034+ if (next >= invs->num_invs) {10351035+ *idx = next;10361036+ return NULL;10371037+ }10381038+ if (!READ_ONCE(invs->inv[next].users)) {10391039+ next++;10401040+ continue;10411041+ }10421042+ *idx = next;10431043+ return &invs->inv[next];10441044+ }10411045}10461046+10471047+/**10481048+ * arm_smmu_invs_for_each_entry - Iterate over all non-trash entries in invs10491049+ * @invs: the base invalidation array10501050+ * @idx: a stack variable of 'size_t', to store the array index10511051+ * @cur: a stack variable of 'struct arm_smmu_inv *'10521052+ */10531053+#define arm_smmu_invs_for_each_entry(invs, idx, cur) \10541054+ for (cur = arm_smmu_invs_iter_next(invs, 0, &(idx)); cur; \10551055+ cur = arm_smmu_invs_iter_next(invs, idx + 1, &(idx)))10561056+10571057+static int arm_smmu_inv_cmp(const struct arm_smmu_inv *inv_l,10581058+ const struct arm_smmu_inv *inv_r)10591059+{10601060+ if (inv_l->smmu != inv_r->smmu)10611061+ return cmp_int((uintptr_t)inv_l->smmu, (uintptr_t)inv_r->smmu);10621062+ if (inv_l->type != inv_r->type)10631063+ return cmp_int(inv_l->type, inv_r->type);10641064+ if (inv_l->id != inv_r->id)10651065+ return cmp_int(inv_l->id, inv_r->id);10661066+ if (arm_smmu_inv_is_ats(inv_l))10671067+ return cmp_int(inv_l->ssid, inv_r->ssid);10681068+ return 0;10691069+}10701070+10711071+static inline int arm_smmu_invs_iter_next_cmp(struct arm_smmu_invs *invs_l,10721072+ size_t next_l, size_t *idx_l,10731073+ struct arm_smmu_invs *invs_r,10741074+ size_t next_r, size_t *idx_r)10751075+{10761076+ struct arm_smmu_inv *cur_l =10771077+ arm_smmu_invs_iter_next(invs_l, next_l, idx_l);10781078+10791079+ /*10801080+ * We have to update the idx_r manually, because the invs_r cannot call10811081+ * arm_smmu_invs_iter_next() as the invs_r never sets any users counter.10821082+ */10831083+ *idx_r = next_r;10841084+10851085+ /*10861086+ * Compare of two sorted arrays items. If one side is past the end of10871087+ * the array, return the other side to let it run out the iteration.10881088+ *10891089+ * If the left entry is empty, return 1 to pick the right entry.10901090+ * If the right entry is empty, return -1 to pick the left entry.10911091+ */10921092+ if (!cur_l)10931093+ return 1;10941094+ if (next_r >= invs_r->num_invs)10951095+ return -1;10961096+ return arm_smmu_inv_cmp(cur_l, &invs_r->inv[next_r]);10971097+}10981098+10991099+/**11001100+ * arm_smmu_invs_for_each_cmp - Iterate over two sorted arrays computing for11011101+ * arm_smmu_invs_merge() or arm_smmu_invs_unref()11021102+ * @invs_l: the base invalidation array11031103+ * @idx_l: a stack variable of 'size_t', to store the base array index11041104+ * @invs_r: the build_invs array as to_merge or to_unref11051105+ * @idx_r: a stack variable of 'size_t', to store the build_invs index11061106+ * @cmp: a stack variable of 'int', to store return value (-1, 0, or 1)11071107+ */11081108+#define arm_smmu_invs_for_each_cmp(invs_l, idx_l, invs_r, idx_r, cmp) \11091109+ for (idx_l = idx_r = 0, \11101110+ cmp = arm_smmu_invs_iter_next_cmp(invs_l, 0, &(idx_l), \11111111+ invs_r, 0, &(idx_r)); \11121112+ idx_l < invs_l->num_invs || idx_r < invs_r->num_invs; \11131113+ cmp = arm_smmu_invs_iter_next_cmp( \11141114+ invs_l, idx_l + (cmp <= 0 ? 1 : 0), &(idx_l), \11151115+ invs_r, idx_r + (cmp >= 0 ? 1 : 0), &(idx_r)))11161116+11171117+/**11181118+ * arm_smmu_invs_merge() - Merge @to_merge into @invs and generate a new array11191119+ * @invs: the base invalidation array11201120+ * @to_merge: an array of invalidations to merge11211121+ *11221122+ * Return: a newly allocated array on success, or ERR_PTR11231123+ *11241124+ * This function must be locked and serialized with arm_smmu_invs_unref() and11251125+ * arm_smmu_invs_purge(), but do not lockdep on any lock for KUNIT test.11261126+ *11271127+ * Both @invs and @to_merge must be sorted, to ensure the returned array will be11281128+ * sorted as well.11291129+ *11301130+ * Caller is responsible for freeing the @invs and the returned new one.11311131+ *11321132+ * Entries marked as trash will be purged in the returned array.11331133+ */11341134+VISIBLE_IF_KUNIT11351135+struct arm_smmu_invs *arm_smmu_invs_merge(struct arm_smmu_invs *invs,11361136+ struct arm_smmu_invs *to_merge)11371137+{11381138+ struct arm_smmu_invs *new_invs;11391139+ struct arm_smmu_inv *new;11401140+ size_t num_invs = 0;11411141+ size_t i, j;11421142+ int cmp;11431143+11441144+ arm_smmu_invs_for_each_cmp(invs, i, to_merge, j, cmp)11451145+ num_invs++;11461146+11471147+ new_invs = arm_smmu_invs_alloc(num_invs);11481148+ if (!new_invs)11491149+ return ERR_PTR(-ENOMEM);11501150+11511151+ new = new_invs->inv;11521152+ arm_smmu_invs_for_each_cmp(invs, i, to_merge, j, cmp) {11531153+ if (cmp < 0) {11541154+ *new = invs->inv[i];11551155+ } else if (cmp == 0) {11561156+ *new = invs->inv[i];11571157+ WRITE_ONCE(new->users, READ_ONCE(new->users) + 1);11581158+ } else {11591159+ *new = to_merge->inv[j];11601160+ WRITE_ONCE(new->users, 1);11611161+ }11621162+11631163+ /*11641164+ * Check that the new array is sorted. This also validates that11651165+ * to_merge is sorted.11661166+ */11671167+ if (new != new_invs->inv)11681168+ WARN_ON_ONCE(arm_smmu_inv_cmp(new - 1, new) == 1);11691169+ if (arm_smmu_inv_is_ats(new))11701170+ new_invs->has_ats = true;11711171+ new++;11721172+ }11731173+11741174+ WARN_ON(new != new_invs->inv + new_invs->num_invs);11751175+11761176+ return new_invs;11771177+}11781178+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_invs_merge);11791179+11801180+/**11811181+ * arm_smmu_invs_unref() - Find in @invs for all entries in @to_unref, decrease11821182+ * the user counts without deletions11831183+ * @invs: the base invalidation array11841184+ * @to_unref: an array of invalidations to decrease their user counts11851185+ *11861186+ * Return: the number of trash entries in the array, for arm_smmu_invs_purge()11871187+ *11881188+ * This function will not fail. Any entry with users=0 will be marked as trash,11891189+ * and caller will be notified about the trashed entry via @to_unref by setting11901190+ * a users=0.11911191+ *11921192+ * All tailing trash entries in the array will be dropped. And the size of the11931193+ * array will be trimmed properly. All trash entries in-between will remain in11941194+ * the @invs until being completely deleted by the next arm_smmu_invs_merge()11951195+ * or an arm_smmu_invs_purge() function call.11961196+ *11971197+ * This function must be locked and serialized with arm_smmu_invs_merge() and11981198+ * arm_smmu_invs_purge(), but do not lockdep on any mutex for KUNIT test.11991199+ *12001200+ * Note that the final @invs->num_invs might not reflect the actual number of12011201+ * invalidations due to trash entries. Any reader should take the read lock to12021202+ * iterate each entry and check its users counter till the last entry.12031203+ */12041204+VISIBLE_IF_KUNIT12051205+void arm_smmu_invs_unref(struct arm_smmu_invs *invs,12061206+ struct arm_smmu_invs *to_unref)12071207+{12081208+ unsigned long flags;12091209+ size_t num_invs = 0;12101210+ size_t i, j;12111211+ int cmp;12121212+12131213+ arm_smmu_invs_for_each_cmp(invs, i, to_unref, j, cmp) {12141214+ if (cmp < 0) {12151215+ /* not found in to_unref, leave alone */12161216+ num_invs = i + 1;12171217+ } else if (cmp == 0) {12181218+ int users = READ_ONCE(invs->inv[i].users) - 1;12191219+12201220+ if (WARN_ON(users < 0))12211221+ continue;12221222+12231223+ /* same item */12241224+ WRITE_ONCE(invs->inv[i].users, users);12251225+ if (users) {12261226+ WRITE_ONCE(to_unref->inv[j].users, 1);12271227+ num_invs = i + 1;12281228+ continue;12291229+ }12301230+12311231+ /* Notify the caller about the trash entry */12321232+ WRITE_ONCE(to_unref->inv[j].users, 0);12331233+ invs->num_trashes++;12341234+ } else {12351235+ /* item in to_unref is not in invs or already a trash */12361236+ WARN_ON(true);12371237+ }12381238+ }12391239+12401240+ /* Exclude any tailing trash */12411241+ invs->num_trashes -= invs->num_invs - num_invs;12421242+12431243+ /* The lock is required to fence concurrent ATS operations. */12441244+ write_lock_irqsave(&invs->rwlock, flags);12451245+ WRITE_ONCE(invs->num_invs, num_invs); /* Remove tailing trash entries */12461246+ write_unlock_irqrestore(&invs->rwlock, flags);12471247+}12481248+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_invs_unref);12491249+12501250+/**12511251+ * arm_smmu_invs_purge() - Purge all the trash entries in the @invs12521252+ * @invs: the base invalidation array12531253+ *12541254+ * Return: a newly allocated array on success removing all the trash entries, or12551255+ * NULL if there is no trash entry in the array or if allocation failed12561256+ *12571257+ * This function must be locked and serialized with arm_smmu_invs_merge() and12581258+ * arm_smmu_invs_unref(), but do not lockdep on any lock for KUNIT test.12591259+ *12601260+ * Caller is responsible for freeing the @invs and the returned new one.12611261+ */12621262+VISIBLE_IF_KUNIT12631263+struct arm_smmu_invs *arm_smmu_invs_purge(struct arm_smmu_invs *invs)12641264+{12651265+ struct arm_smmu_invs *new_invs;12661266+ struct arm_smmu_inv *inv;12671267+ size_t i, num_invs = 0;12681268+12691269+ if (WARN_ON(invs->num_invs < invs->num_trashes))12701270+ return NULL;12711271+ if (!invs->num_invs || !invs->num_trashes)12721272+ return NULL;12731273+12741274+ new_invs = arm_smmu_invs_alloc(invs->num_invs - invs->num_trashes);12751275+ if (!new_invs)12761276+ return NULL;12771277+12781278+ arm_smmu_invs_for_each_entry(invs, i, inv) {12791279+ new_invs->inv[num_invs] = *inv;12801280+ if (arm_smmu_inv_is_ats(inv))12811281+ new_invs->has_ats = true;12821282+ num_invs++;12831283+ }12841284+12851285+ WARN_ON(num_invs != new_invs->num_invs);12861286+ return new_invs;12871287+}12881288+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_invs_purge);12891289+12901290+/* Context descriptor manipulation functions */1042129110431292/*10441293 * Based on the value of ent report which bits of the STE the HW will access. It···14881235{14891236 __le64 unused_update[NUM_ENTRY_QWORDS];14901237 u8 used_qword_diff;12381238+12391239+ /*12401240+ * Many of the entry structures have pointers to other structures that12411241+ * need to have their updates be visible before any writes of the entry12421242+ * happen.12431243+ */12441244+ dma_wmb();1491124514921246 used_qword_diff =14931247 arm_smmu_entry_qword_diff(writer, entry, target, unused_update);···25002240 return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);25012241}2502224225032503-int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,25042504- unsigned long iova, size_t size)25052505-{25062506- struct arm_smmu_master_domain *master_domain;25072507- int i;25082508- unsigned long flags;25092509- struct arm_smmu_cmdq_ent cmd = {25102510- .opcode = CMDQ_OP_ATC_INV,25112511- };25122512- struct arm_smmu_cmdq_batch cmds;25132513-25142514- if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))25152515- return 0;25162516-25172517- /*25182518- * Ensure that we've completed prior invalidation of the main TLBs25192519- * before we read 'nr_ats_masters' in case of a concurrent call to25202520- * arm_smmu_enable_ats():25212521- *25222522- * // unmap() // arm_smmu_enable_ats()25232523- * TLBI+SYNC atomic_inc(&nr_ats_masters);25242524- * smp_mb(); [...]25252525- * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()25262526- *25272527- * Ensures that we always see the incremented 'nr_ats_masters' count if25282528- * ATS was enabled at the PCI device before completion of the TLBI.25292529- */25302530- smp_mb();25312531- if (!atomic_read(&smmu_domain->nr_ats_masters))25322532- return 0;25332533-25342534- arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd);25352535-25362536- spin_lock_irqsave(&smmu_domain->devices_lock, flags);25372537- list_for_each_entry(master_domain, &smmu_domain->devices,25382538- devices_elm) {25392539- struct arm_smmu_master *master = master_domain->master;25402540-25412541- if (!master->ats_enabled)25422542- continue;25432543-25442544- if (master_domain->nested_ats_flush) {25452545- /*25462546- * If a S2 used as a nesting parent is changed we have25472547- * no option but to completely flush the ATC.25482548- */25492549- arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);25502550- } else {25512551- arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size,25522552- &cmd);25532553- }25542554-25552555- for (i = 0; i < master->num_streams; i++) {25562556- cmd.atc.sid = master->streams[i].id;25572557- arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);25582558- }25592559- }25602560- spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);25612561-25622562- return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);25632563-}25642564-25652243/* IO_PGTABLE API */25662244static void arm_smmu_tlb_inv_context(void *cookie)25672245{25682246 struct arm_smmu_domain *smmu_domain = cookie;25692569- struct arm_smmu_device *smmu = smmu_domain->smmu;25702570- struct arm_smmu_cmdq_ent cmd;2571224725722248 /*25732573- * NOTE: when io-pgtable is in non-strict mode, we may get here with25742574- * PTEs previously cleared by unmaps on the current CPU not yet visible25752575- * to the SMMU. We are relying on the dma_wmb() implicit during cmd25762576- * insertion to guarantee those are observed before the TLBI. Do be25772577- * careful, 007.22492249+ * If the DMA API is running in non-strict mode then another CPU could22502250+ * have changed the page table and not invoked any flush op. Instead the22512251+ * other CPU will do an atomic_read() and this CPU will have done an22522252+ * atomic_write(). That handshake is enough to acquire the page table22532253+ * writes from the other CPU.22542254+ *22552255+ * All command execution has a dma_wmb() to release all the in-memory22562256+ * structures written by this CPU, that barrier must also release the22572257+ * writes acquired from all the other CPUs too.22582258+ *22592259+ * There are other barriers and atomics on this path, but the above is22602260+ * the essential mechanism for ensuring that HW sees the page table22612261+ * writes from another CPU before it executes the IOTLB invalidation.25782262 */25792579- if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {25802580- arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);25812581- } else {25822582- cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;25832583- cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;25842584- arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);25852585- }25862586- arm_smmu_atc_inv_domain(smmu_domain, 0, 0);22632263+ arm_smmu_domain_inv(smmu_domain);25872264}2588226525892589-static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,25902590- unsigned long iova, size_t size,25912591- size_t granule,25922592- struct arm_smmu_domain *smmu_domain)22662266+static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,22672267+ struct arm_smmu_cmdq_batch *cmds,22682268+ struct arm_smmu_cmdq_ent *cmd,22692269+ unsigned long iova, size_t size,22702270+ size_t granule, size_t pgsize)25932271{25942594- struct arm_smmu_device *smmu = smmu_domain->smmu;25952595- unsigned long end = iova + size, num_pages = 0, tg = 0;22722272+ unsigned long end = iova + size, num_pages = 0, tg = pgsize;25962273 size_t inv_range = granule;25972597- struct arm_smmu_cmdq_batch cmds;2598227425992599- if (!size)22752275+ if (WARN_ON_ONCE(!size))26002276 return;2601227726022278 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {26032603- /* Get the leaf page size */26042604- tg = __ffs(smmu_domain->domain.pgsize_bitmap);26052605-26062279 num_pages = size >> tg;2607228026082281 /* Convert page size of 12,14,16 (log2) to 1,2,3 */···25542361 else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)25552362 num_pages++;25562363 }25572557-25582558- arm_smmu_cmdq_batch_init(smmu, &cmds, cmd);2559236425602365 while (iova < end) {25612366 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {···25822391 }2583239225842393 cmd->tlbi.addr = iova;25852585- arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);23942394+ arm_smmu_cmdq_batch_add(smmu, cmds, cmd);25862395 iova += inv_range;25872396 }25882588- arm_smmu_cmdq_batch_submit(smmu, &cmds);25892397}2590239825912591-static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,25922592- size_t granule, bool leaf,25932593- struct arm_smmu_domain *smmu_domain)23992399+static bool arm_smmu_inv_size_too_big(struct arm_smmu_device *smmu, size_t size,24002400+ size_t granule)25942401{25952595- struct arm_smmu_cmdq_ent cmd = {25962596- .tlbi = {25972597- .leaf = leaf,25982598- },25992599- };24022402+ size_t max_tlbi_ops;2600240326012601- if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {26022602- cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?26032603- CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;26042604- cmd.tlbi.asid = smmu_domain->cd.asid;26052605- } else {26062606- cmd.opcode = CMDQ_OP_TLBI_S2_IPA;26072607- cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;26082608- }26092609- __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);24042404+ /* 0 size means invalidate all */24052405+ if (!size || size == SIZE_MAX)24062406+ return true;2610240726112611- if (smmu_domain->nest_parent) {26122612- /*26132613- * When the S2 domain changes all the nested S1 ASIDs have to be26142614- * flushed too.26152615- */26162616- cmd.opcode = CMDQ_OP_TLBI_NH_ALL;26172617- arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain->smmu, &cmd);26182618- }24082408+ if (smmu->features & ARM_SMMU_FEAT_RANGE_INV)24092409+ return false;2619241026202411 /*26212621- * Unfortunately, this can't be leaf-only since we may have26222622- * zapped an entire table.24122412+ * Borrowed from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h,24132413+ * this is used as a threshold to replace "size_opcode" commands with a24142414+ * single "nsize_opcode" command, when SMMU doesn't implement the range24152415+ * invalidation feature, where there can be too many per-granule TLBIs,24162416+ * resulting in a soft lockup.26232417 */26242624- arm_smmu_atc_inv_domain(smmu_domain, iova, size);24182418+ max_tlbi_ops = 1 << (ilog2(granule) - 3);24192419+ return size >= max_tlbi_ops * granule;26252420}2626242126272627-void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,26282628- size_t granule, bool leaf,26292629- struct arm_smmu_domain *smmu_domain)24222422+/* Used by non INV_TYPE_ATS* invalidations */24232423+static void arm_smmu_inv_to_cmdq_batch(struct arm_smmu_inv *inv,24242424+ struct arm_smmu_cmdq_batch *cmds,24252425+ struct arm_smmu_cmdq_ent *cmd,24262426+ unsigned long iova, size_t size,24272427+ unsigned int granule)26302428{26312631- struct arm_smmu_cmdq_ent cmd = {26322632- .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?26332633- CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,26342634- .tlbi = {26352635- .asid = asid,26362636- .leaf = leaf,26372637- },26382638- };24292429+ if (arm_smmu_inv_size_too_big(inv->smmu, size, granule)) {24302430+ cmd->opcode = inv->nsize_opcode;24312431+ arm_smmu_cmdq_batch_add(inv->smmu, cmds, cmd);24322432+ return;24332433+ }2639243426402640- __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);24352435+ cmd->opcode = inv->size_opcode;24362436+ arm_smmu_cmdq_batch_add_range(inv->smmu, cmds, cmd, iova, size, granule,24372437+ inv->pgsize);24382438+}24392439+24402440+static inline bool arm_smmu_invs_end_batch(struct arm_smmu_inv *cur,24412441+ struct arm_smmu_inv *next)24422442+{24432443+ /* Changing smmu means changing command queue */24442444+ if (cur->smmu != next->smmu)24452445+ return true;24462446+ /* The batch for S2 TLBI must be done before nested S1 ASIDs */24472447+ if (cur->type != INV_TYPE_S2_VMID_S1_CLEAR &&24482448+ next->type == INV_TYPE_S2_VMID_S1_CLEAR)24492449+ return true;24502450+ /* ATS must be after a sync of the S1/S2 invalidations */24512451+ if (!arm_smmu_inv_is_ats(cur) && arm_smmu_inv_is_ats(next))24522452+ return true;24532453+ return false;24542454+}24552455+24562456+static void __arm_smmu_domain_inv_range(struct arm_smmu_invs *invs,24572457+ unsigned long iova, size_t size,24582458+ unsigned int granule, bool leaf)24592459+{24602460+ struct arm_smmu_cmdq_batch cmds = {};24612461+ struct arm_smmu_inv *cur;24622462+ struct arm_smmu_inv *end;24632463+24642464+ cur = invs->inv;24652465+ end = cur + READ_ONCE(invs->num_invs);24662466+ /* Skip any leading entry marked as a trash */24672467+ for (; cur != end; cur++)24682468+ if (READ_ONCE(cur->users))24692469+ break;24702470+ while (cur != end) {24712471+ struct arm_smmu_device *smmu = cur->smmu;24722472+ struct arm_smmu_cmdq_ent cmd = {24732473+ /*24742474+ * Pick size_opcode to run arm_smmu_get_cmdq(). This can24752475+ * be changed to nsize_opcode, which would result in the24762476+ * same CMDQ pointer.24772477+ */24782478+ .opcode = cur->size_opcode,24792479+ };24802480+ struct arm_smmu_inv *next;24812481+24822482+ if (!cmds.num)24832483+ arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);24842484+24852485+ switch (cur->type) {24862486+ case INV_TYPE_S1_ASID:24872487+ cmd.tlbi.asid = cur->id;24882488+ cmd.tlbi.leaf = leaf;24892489+ arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, iova, size,24902490+ granule);24912491+ break;24922492+ case INV_TYPE_S2_VMID:24932493+ cmd.tlbi.vmid = cur->id;24942494+ cmd.tlbi.leaf = leaf;24952495+ arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, iova, size,24962496+ granule);24972497+ break;24982498+ case INV_TYPE_S2_VMID_S1_CLEAR:24992499+ /* CMDQ_OP_TLBI_S12_VMALL already flushed S1 entries */25002500+ if (arm_smmu_inv_size_too_big(cur->smmu, size, granule))25012501+ break;25022502+ cmd.tlbi.vmid = cur->id;25032503+ arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);25042504+ break;25052505+ case INV_TYPE_ATS:25062506+ arm_smmu_atc_inv_to_cmd(cur->ssid, iova, size, &cmd);25072507+ cmd.atc.sid = cur->id;25082508+ arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);25092509+ break;25102510+ case INV_TYPE_ATS_FULL:25112511+ arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);25122512+ cmd.atc.sid = cur->id;25132513+ arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);25142514+ break;25152515+ default:25162516+ WARN_ON_ONCE(1);25172517+ break;25182518+ }25192519+25202520+ /* Skip any trash entry in-between */25212521+ for (next = cur + 1; next != end; next++)25222522+ if (READ_ONCE(next->users))25232523+ break;25242524+25252525+ if (cmds.num &&25262526+ (next == end || arm_smmu_invs_end_batch(cur, next))) {25272527+ arm_smmu_cmdq_batch_submit(smmu, &cmds);25282528+ cmds.num = 0;25292529+ }25302530+ cur = next;25312531+ }25322532+}25332533+25342534+void arm_smmu_domain_inv_range(struct arm_smmu_domain *smmu_domain,25352535+ unsigned long iova, size_t size,25362536+ unsigned int granule, bool leaf)25372537+{25382538+ struct arm_smmu_invs *invs;25392539+25402540+ /*25412541+ * An invalidation request must follow some IOPTE change and then load25422542+ * an invalidation array. In the meantime, a domain attachment mutates25432543+ * the array and then stores an STE/CD asking SMMU HW to acquire those25442544+ * changed IOPTEs.25452545+ *25462546+ * When running alone, a domain attachment relies on the dma_wmb() in25472547+ * arm_smmu_write_entry() used by arm_smmu_install_ste_for_dev().25482548+ *25492549+ * But in a race, these two can be interdependent, making it a special25502550+ * case requiring an additional smp_mb() for the write->read ordering.25512551+ * Pairing with the dma_wmb() in arm_smmu_install_ste_for_dev(), this25522552+ * makes sure that IOPTE update prior to this point is visible to SMMU25532553+ * hardware before we load the updated invalidation array.25542554+ *25552555+ * [CPU0] | [CPU1]25562556+ * change IOPTE on new domain: |25572557+ * arm_smmu_domain_inv_range() { | arm_smmu_install_new_domain_invs()25582558+ * smp_mb(); // ensures IOPTE | arm_smmu_install_ste_for_dev {25592559+ * // seen by SMMU | dma_wmb(); // ensures invs update25602560+ * // load the updated invs | // before updating STE25612561+ * invs = rcu_dereference(); | STE = TTB0;25622562+ * ... | ...25632563+ * } | }25642564+ */25652565+ smp_mb();25662566+25672567+ rcu_read_lock();25682568+ invs = rcu_dereference(smmu_domain->invs);25692569+25702570+ /*25712571+ * Avoid locking unless ATS is being used. No ATC invalidation can be25722572+ * going on after a domain is detached.25732573+ */25742574+ if (invs->has_ats) {25752575+ unsigned long flags;25762576+25772577+ read_lock_irqsave(&invs->rwlock, flags);25782578+ __arm_smmu_domain_inv_range(invs, iova, size, granule, leaf);25792579+ read_unlock_irqrestore(&invs->rwlock, flags);25802580+ } else {25812581+ __arm_smmu_domain_inv_range(invs, iova, size, granule, leaf);25822582+ }25832583+25842584+ rcu_read_unlock();26412585}2642258626432587static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,···27882462static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,27892463 size_t granule, void *cookie)27902464{27912791- arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);24652465+ struct arm_smmu_domain *smmu_domain = cookie;24662466+24672467+ arm_smmu_domain_inv_range(smmu_domain, iova, size, granule, false);27922468}2793246927942470static const struct iommu_flush_ops arm_smmu_flush_ops = {···28222494 return true;28232495 case IOMMU_CAP_DIRTY_TRACKING:28242496 return arm_smmu_dbm_capable(master->smmu);24972497+ case IOMMU_CAP_PCI_ATS_SUPPORTED:24982498+ return arm_smmu_ats_supported(master);28252499 default:28262500 return false;28272501 }···28522522struct arm_smmu_domain *arm_smmu_domain_alloc(void)28532523{28542524 struct arm_smmu_domain *smmu_domain;25252525+ struct arm_smmu_invs *new_invs;2855252628562527 smmu_domain = kzalloc_obj(*smmu_domain);28572528 if (!smmu_domain)28582529 return ERR_PTR(-ENOMEM);2859253025312531+ new_invs = arm_smmu_invs_alloc(0);25322532+ if (!new_invs) {25332533+ kfree(smmu_domain);25342534+ return ERR_PTR(-ENOMEM);25352535+ }25362536+28602537 INIT_LIST_HEAD(&smmu_domain->devices);28612538 spin_lock_init(&smmu_domain->devices_lock);25392539+ rcu_assign_pointer(smmu_domain->invs, new_invs);2862254028632541 return smmu_domain;28642542}···28902552 ida_free(&smmu->vmid_map, cfg->vmid);28912553 }2892255428932893- kfree(smmu_domain);25552555+ arm_smmu_domain_free(smmu_domain);28942556}2895255728962558static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu,···32082870 iopf_queue_remove_device(master->smmu->evtq.iopf, master->dev);32092871}3210287228732873+static struct arm_smmu_inv *28742874+arm_smmu_master_build_inv(struct arm_smmu_master *master,28752875+ enum arm_smmu_inv_type type, u32 id, ioasid_t ssid,28762876+ size_t pgsize)28772877+{28782878+ struct arm_smmu_invs *build_invs = master->build_invs;28792879+ struct arm_smmu_inv *cur, inv = {28802880+ .smmu = master->smmu,28812881+ .type = type,28822882+ .id = id,28832883+ .pgsize = pgsize,28842884+ };28852885+28862886+ if (WARN_ON(build_invs->num_invs >= build_invs->max_invs))28872887+ return NULL;28882888+ cur = &build_invs->inv[build_invs->num_invs];28892889+ build_invs->num_invs++;28902890+28912891+ *cur = inv;28922892+ switch (type) {28932893+ case INV_TYPE_S1_ASID:28942894+ /*28952895+ * For S1 page tables the driver always uses VMID=0, and the28962896+ * invalidation logic for this type will set it as well.28972897+ */28982898+ if (master->smmu->features & ARM_SMMU_FEAT_E2H) {28992899+ cur->size_opcode = CMDQ_OP_TLBI_EL2_VA;29002900+ cur->nsize_opcode = CMDQ_OP_TLBI_EL2_ASID;29012901+ } else {29022902+ cur->size_opcode = CMDQ_OP_TLBI_NH_VA;29032903+ cur->nsize_opcode = CMDQ_OP_TLBI_NH_ASID;29042904+ }29052905+ break;29062906+ case INV_TYPE_S2_VMID:29072907+ cur->size_opcode = CMDQ_OP_TLBI_S2_IPA;29082908+ cur->nsize_opcode = CMDQ_OP_TLBI_S12_VMALL;29092909+ break;29102910+ case INV_TYPE_S2_VMID_S1_CLEAR:29112911+ cur->size_opcode = cur->nsize_opcode = CMDQ_OP_TLBI_NH_ALL;29122912+ break;29132913+ case INV_TYPE_ATS:29142914+ case INV_TYPE_ATS_FULL:29152915+ cur->size_opcode = cur->nsize_opcode = CMDQ_OP_ATC_INV;29162916+ cur->ssid = ssid;29172917+ break;29182918+ }29192919+29202920+ return cur;29212921+}29222922+29232923+/*29242924+ * Use the preallocated scratch array at master->build_invs, to build a to_merge29252925+ * or to_unref array, to pass into a following arm_smmu_invs_merge/unref() call.29262926+ *29272927+ * Do not free the returned invs array. It is reused, and will be overwritten by29282928+ * the next arm_smmu_master_build_invs() call.29292929+ */29302930+static struct arm_smmu_invs *29312931+arm_smmu_master_build_invs(struct arm_smmu_master *master, bool ats_enabled,29322932+ ioasid_t ssid, struct arm_smmu_domain *smmu_domain)29332933+{29342934+ const bool nesting = smmu_domain->nest_parent;29352935+ size_t pgsize = 0, i;29362936+29372937+ iommu_group_mutex_assert(master->dev);29382938+29392939+ master->build_invs->num_invs = 0;29402940+29412941+ /* Range-based invalidation requires the leaf pgsize for calculation */29422942+ if (master->smmu->features & ARM_SMMU_FEAT_RANGE_INV)29432943+ pgsize = __ffs(smmu_domain->domain.pgsize_bitmap);29442944+29452945+ switch (smmu_domain->stage) {29462946+ case ARM_SMMU_DOMAIN_SVA:29472947+ case ARM_SMMU_DOMAIN_S1:29482948+ if (!arm_smmu_master_build_inv(master, INV_TYPE_S1_ASID,29492949+ smmu_domain->cd.asid,29502950+ IOMMU_NO_PASID, pgsize))29512951+ return NULL;29522952+ break;29532953+ case ARM_SMMU_DOMAIN_S2:29542954+ if (!arm_smmu_master_build_inv(master, INV_TYPE_S2_VMID,29552955+ smmu_domain->s2_cfg.vmid,29562956+ IOMMU_NO_PASID, pgsize))29572957+ return NULL;29582958+ break;29592959+ default:29602960+ WARN_ON(true);29612961+ return NULL;29622962+ }29632963+29642964+ /* All the nested S1 ASIDs have to be flushed when S2 parent changes */29652965+ if (nesting) {29662966+ if (!arm_smmu_master_build_inv(29672967+ master, INV_TYPE_S2_VMID_S1_CLEAR,29682968+ smmu_domain->s2_cfg.vmid, IOMMU_NO_PASID, 0))29692969+ return NULL;29702970+ }29712971+29722972+ for (i = 0; ats_enabled && i < master->num_streams; i++) {29732973+ /*29742974+ * If an S2 used as a nesting parent is changed we have no29752975+ * option but to completely flush the ATC.29762976+ */29772977+ if (!arm_smmu_master_build_inv(29782978+ master, nesting ? INV_TYPE_ATS_FULL : INV_TYPE_ATS,29792979+ master->streams[i].id, ssid, 0))29802980+ return NULL;29812981+ }29822982+29832983+ /* Note this build_invs must have been sorted */29842984+29852985+ return master->build_invs;29862986+}29872987+32112988static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,32122989 struct iommu_domain *domain,32132990 ioasid_t ssid)···3350289733512898 arm_smmu_disable_iopf(master, master_domain);33522899 kfree(master_domain);29002900+}29012901+29022902+/*29032903+ * During attachment, the updates of the two domain->invs arrays are sequenced:29042904+ * 1. new domain updates its invs array, merging master->build_invs29052905+ * 2. new domain starts to include the master during its invalidation29062906+ * 3. master updates its STE switching from the old domain to the new domain29072907+ * 4. old domain still includes the master during its invalidation29082908+ * 5. old domain updates its invs array, unreferencing master->build_invs29092909+ *29102910+ * For 1 and 5, prepare the two updated arrays in advance, handling any changes29112911+ * that can possibly failure. So the actual update of either 1 or 5 won't fail.29122912+ * arm_smmu_asid_lock ensures that the old invs in the domains are intact while29132913+ * we are sequencing to update them.29142914+ */29152915+static int arm_smmu_attach_prepare_invs(struct arm_smmu_attach_state *state,29162916+ struct iommu_domain *new_domain)29172917+{29182918+ struct arm_smmu_domain *old_smmu_domain =29192919+ to_smmu_domain_devices(state->old_domain);29202920+ struct arm_smmu_domain *new_smmu_domain =29212921+ to_smmu_domain_devices(new_domain);29222922+ struct arm_smmu_master *master = state->master;29232923+ ioasid_t ssid = state->ssid;29242924+29252925+ /*29262926+ * At this point a NULL domain indicates the domain doesn't use the29272927+ * IOTLB, see to_smmu_domain_devices().29282928+ */29292929+ if (new_smmu_domain) {29302930+ struct arm_smmu_inv_state *invst = &state->new_domain_invst;29312931+ struct arm_smmu_invs *build_invs;29322932+29332933+ invst->invs_ptr = &new_smmu_domain->invs;29342934+ invst->old_invs = rcu_dereference_protected(29352935+ new_smmu_domain->invs,29362936+ lockdep_is_held(&arm_smmu_asid_lock));29372937+ build_invs = arm_smmu_master_build_invs(29382938+ master, state->ats_enabled, ssid, new_smmu_domain);29392939+ if (!build_invs)29402940+ return -EINVAL;29412941+29422942+ invst->new_invs =29432943+ arm_smmu_invs_merge(invst->old_invs, build_invs);29442944+ if (IS_ERR(invst->new_invs))29452945+ return PTR_ERR(invst->new_invs);29462946+ }29472947+29482948+ if (old_smmu_domain) {29492949+ struct arm_smmu_inv_state *invst = &state->old_domain_invst;29502950+29512951+ invst->invs_ptr = &old_smmu_domain->invs;29522952+ /* A re-attach case might have a different ats_enabled state */29532953+ if (new_smmu_domain == old_smmu_domain)29542954+ invst->old_invs = state->new_domain_invst.new_invs;29552955+ else29562956+ invst->old_invs = rcu_dereference_protected(29572957+ old_smmu_domain->invs,29582958+ lockdep_is_held(&arm_smmu_asid_lock));29592959+ /* For old_smmu_domain, new_invs points to master->build_invs */29602960+ invst->new_invs = arm_smmu_master_build_invs(29612961+ master, master->ats_enabled, ssid, old_smmu_domain);29622962+ }29632963+29642964+ return 0;29652965+}29662966+29672967+/* Must be installed before arm_smmu_install_ste_for_dev() */29682968+static void29692969+arm_smmu_install_new_domain_invs(struct arm_smmu_attach_state *state)29702970+{29712971+ struct arm_smmu_inv_state *invst = &state->new_domain_invst;29722972+29732973+ if (!invst->invs_ptr)29742974+ return;29752975+29762976+ rcu_assign_pointer(*invst->invs_ptr, invst->new_invs);29772977+ kfree_rcu(invst->old_invs, rcu);29782978+}29792979+29802980+static void arm_smmu_inv_flush_iotlb_tag(struct arm_smmu_inv *inv)29812981+{29822982+ struct arm_smmu_cmdq_ent cmd = {};29832983+29842984+ switch (inv->type) {29852985+ case INV_TYPE_S1_ASID:29862986+ cmd.tlbi.asid = inv->id;29872987+ break;29882988+ case INV_TYPE_S2_VMID:29892989+ /* S2_VMID using nsize_opcode covers S2_VMID_S1_CLEAR */29902990+ cmd.tlbi.vmid = inv->id;29912991+ break;29922992+ default:29932993+ return;29942994+ }29952995+29962996+ cmd.opcode = inv->nsize_opcode;29972997+ arm_smmu_cmdq_issue_cmd_with_sync(inv->smmu, &cmd);29982998+}29992999+30003000+/* Should be installed after arm_smmu_install_ste_for_dev() */30013001+static void30023002+arm_smmu_install_old_domain_invs(struct arm_smmu_attach_state *state)30033003+{30043004+ struct arm_smmu_inv_state *invst = &state->old_domain_invst;30053005+ struct arm_smmu_invs *old_invs = invst->old_invs;30063006+ struct arm_smmu_invs *new_invs;30073007+30083008+ lockdep_assert_held(&arm_smmu_asid_lock);30093009+30103010+ if (!invst->invs_ptr)30113011+ return;30123012+30133013+ arm_smmu_invs_unref(old_invs, invst->new_invs);30143014+ /*30153015+ * When an IOTLB tag (the first entry in invs->new_invs) is no longer used,30163016+ * it means the ASID or VMID will no longer be invalidated by map/unmap and30173017+ * must be cleaned right now. The rule is that any ASID/VMID not in an invs30183018+ * array must be left cleared in the IOTLB.30193019+ */30203020+ if (!READ_ONCE(invst->new_invs->inv[0].users))30213021+ arm_smmu_inv_flush_iotlb_tag(&invst->new_invs->inv[0]);30223022+30233023+ new_invs = arm_smmu_invs_purge(old_invs);30243024+ if (!new_invs)30253025+ return;30263026+30273027+ rcu_assign_pointer(*invst->invs_ptr, new_invs);30283028+ kfree_rcu(old_invs, rcu);33533029}3354303033553031/*···35382956 arm_smmu_ats_supported(master);35392957 }3540295829592959+ ret = arm_smmu_attach_prepare_invs(state, new_domain);29602960+ if (ret)29612961+ return ret;29622962+35412963 if (smmu_domain) {35422964 if (new_domain->type == IOMMU_DOMAIN_NESTED) {35432965 ret = arm_smmu_attach_prepare_vmaster(35442966 state, to_smmu_nested_domain(new_domain));35452967 if (ret)35463546- return ret;29682968+ goto err_unprepare_invs;35472969 }3548297035492971 master_domain = kzalloc_obj(*master_domain);···35953009 atomic_inc(&smmu_domain->nr_ats_masters);35963010 list_add(&master_domain->devices_elm, &smmu_domain->devices);35973011 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);30123012+30133013+ arm_smmu_install_new_domain_invs(state);35983014 }3599301536003016 if (!state->ats_enabled && master->ats_enabled) {···36163028 kfree(master_domain);36173029err_free_vmaster:36183030 kfree(state->vmaster);30313031+err_unprepare_invs:30323032+ kfree(state->new_domain_invst.new_invs);36193033 return ret;36203034}36213035···36493059 }3650306036513061 arm_smmu_remove_master_domain(master, state->old_domain, state->ssid);30623062+ arm_smmu_install_old_domain_invs(state);36523063 master->ats_enabled = state->ats_enabled;36533064}36543065···37153124 state.ats_enabled);37163125 arm_smmu_install_ste_for_dev(master, &target);37173126 arm_smmu_clear_cd(master, IOMMU_NO_PASID);31273127+ break;31283128+ default:31293129+ WARN_ON(true);37183130 break;37193131 }37203132···38323238{38333239 struct arm_smmu_domain *smmu_domain = to_smmu_domain(old_domain);38343240 struct arm_smmu_master *master = dev_iommu_priv_get(dev);32413241+ struct arm_smmu_attach_state state = {32423242+ .master = master,32433243+ .old_domain = old_domain,32443244+ .ssid = pasid,32453245+ };3835324638363247 mutex_lock(&arm_smmu_asid_lock);32483248+ arm_smmu_attach_prepare_invs(&state, NULL);38373249 arm_smmu_clear_cd(master, pasid);38383250 if (master->ats_enabled)38393251 arm_smmu_atc_inv_master(master, pasid);38403252 arm_smmu_remove_master_domain(master, &smmu_domain->domain, pasid);32533253+ arm_smmu_install_old_domain_invs(&state);38413254 mutex_unlock(&arm_smmu_asid_lock);3842325538433256 /*···40183417 return &smmu_domain->domain;4019341840203419err_free:40214021- kfree(smmu_domain);34203420+ arm_smmu_domain_free(smmu_domain);40223421 return ERR_PTR(ret);40233422}40243423···40633462 if (!gather->pgsize)40643463 return;4065346440664066- arm_smmu_tlb_inv_range_domain(gather->start,40674067- gather->end - gather->start + 1,40684068- gather->pgsize, true, smmu_domain);34653465+ arm_smmu_domain_inv_range(smmu_domain, gather->start,34663466+ gather->end - gather->start + 1,34673467+ gather->pgsize, true);40693468}4070346940713470static phys_addr_t···41103509 return 0;41113510}4112351135123512+static int arm_smmu_stream_id_cmp(const void *_l, const void *_r)35133513+{35143514+ const typeof_member(struct arm_smmu_stream, id) *l = _l;35153515+ const typeof_member(struct arm_smmu_stream, id) *r = _r;35163516+35173517+ return cmp_int(*l, *r);35183518+}35193519+41133520static int arm_smmu_insert_master(struct arm_smmu_device *smmu,41143521 struct arm_smmu_master *master)41153522{41163523 int i;41173524 int ret = 0;41183525 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);35263526+ bool ats_supported = dev_is_pci(master->dev) &&35273527+ pci_ats_supported(to_pci_dev(master->dev));4119352841203529 master->streams = kzalloc_objs(*master->streams, fwspec->num_ids);41213530 if (!master->streams)41223531 return -ENOMEM;41233532 master->num_streams = fwspec->num_ids;4124353335343534+ if (!ats_supported) {35353535+ /* Base case has 1 ASID entry or maximum 2 VMID entries */35363536+ master->build_invs = arm_smmu_invs_alloc(2);35373537+ } else {35383538+ /* ATS case adds num_ids of entries, on top of the base case */35393539+ master->build_invs = arm_smmu_invs_alloc(2 + fwspec->num_ids);35403540+ }35413541+ if (!master->build_invs) {35423542+ kfree(master->streams);35433543+ return -ENOMEM;35443544+ }35453545+35463546+ for (i = 0; i < fwspec->num_ids; i++) {35473547+ struct arm_smmu_stream *new_stream = &master->streams[i];35483548+35493549+ new_stream->id = fwspec->ids[i];35503550+ new_stream->master = master;35513551+ }35523552+35533553+ /* Put the ids into order for sorted to_merge/to_unref arrays */35543554+ sort_nonatomic(master->streams, master->num_streams,35553555+ sizeof(master->streams[0]), arm_smmu_stream_id_cmp,35563556+ NULL);35573557+41253558 mutex_lock(&smmu->streams_mutex);41263559 for (i = 0; i < fwspec->num_ids; i++) {41273560 struct arm_smmu_stream *new_stream = &master->streams[i];41283561 struct rb_node *existing;41294129- u32 sid = fwspec->ids[i];41304130-41314131- new_stream->id = sid;41324132- new_stream->master = master;35623562+ u32 sid = new_stream->id;4133356341343564 ret = arm_smmu_init_sid_strtab(smmu, sid);41353565 if (ret)···41903558 for (i--; i >= 0; i--)41913559 rb_erase(&master->streams[i].node, &smmu->streams);41923560 kfree(master->streams);35613561+ kfree(master->build_invs);41933562 }41943563 mutex_unlock(&smmu->streams_mutex);41953564···42123579 mutex_unlock(&smmu->streams_mutex);4213358042143581 kfree(master->streams);35823582+ kfree(master->build_invs);42153583}4216358442173585static struct iommu_device *arm_smmu_probe_device(struct device *dev)···49424308#define IIDR_IMPLEMENTER_ARM 0x43b49434309#define IIDR_PRODUCTID_ARM_MMU_600 0x48349444310#define IIDR_PRODUCTID_ARM_MMU_700 0x48743114311+#define IIDR_PRODUCTID_ARM_MMU_L1 0x48a43124312+#define IIDR_PRODUCTID_ARM_MMU_S3 0x4984945431349464314static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)49474315{···49684332 smmu->features &= ~ARM_SMMU_FEAT_NESTING;49694333 break;49704334 case IIDR_PRODUCTID_ARM_MMU_700:49714971- /* Arm erratum 2812531 */43354335+ /* Many errata... */49724336 smmu->features &= ~ARM_SMMU_FEAT_BTM;49734973- smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;49744974- /* Arm errata 2268618, 2812531 */49754975- smmu->features &= ~ARM_SMMU_FEAT_NESTING;43374337+ if (variant < 1 || revision < 1) {43384338+ /* Arm erratum 2812531 */43394339+ smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;43404340+ /* Arm errata 2268618, 2812531 */43414341+ smmu->features &= ~ARM_SMMU_FEAT_NESTING;43424342+ }43434343+ break;43444344+ case IIDR_PRODUCTID_ARM_MMU_L1:43454345+ case IIDR_PRODUCTID_ARM_MMU_S3:43464346+ /* Arm errata 3878312/3995052 */43474347+ smmu->features &= ~ARM_SMMU_FEAT_BTM;49764348 break;49774349 }49784350 break;
+136-6
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
···648648 int num;649649};650650651651+/*652652+ * The order here also determines the sequence in which commands are sent to the653653+ * command queue. E.g. TLBI must be done before ATC_INV.654654+ */655655+enum arm_smmu_inv_type {656656+ INV_TYPE_S1_ASID,657657+ INV_TYPE_S2_VMID,658658+ INV_TYPE_S2_VMID_S1_CLEAR,659659+ INV_TYPE_ATS,660660+ INV_TYPE_ATS_FULL,661661+};662662+663663+struct arm_smmu_inv {664664+ struct arm_smmu_device *smmu;665665+ u8 type;666666+ u8 size_opcode;667667+ u8 nsize_opcode;668668+ u32 id; /* ASID or VMID or SID */669669+ union {670670+ size_t pgsize; /* ARM_SMMU_FEAT_RANGE_INV */671671+ u32 ssid; /* INV_TYPE_ATS */672672+ };673673+674674+ int users; /* users=0 to mark as a trash to be purged */675675+};676676+677677+static inline bool arm_smmu_inv_is_ats(const struct arm_smmu_inv *inv)678678+{679679+ return inv->type == INV_TYPE_ATS || inv->type == INV_TYPE_ATS_FULL;680680+}681681+682682+/**683683+ * struct arm_smmu_invs - Per-domain invalidation array684684+ * @max_invs: maximum capacity of the flexible array685685+ * @num_invs: number of invalidations in the flexible array. May be smaller than686686+ * @max_invs after a tailing trash entry is excluded, but must not be687687+ * greater than @max_invs688688+ * @num_trashes: number of trash entries in the array for arm_smmu_invs_purge().689689+ * Must not be greater than @num_invs690690+ * @rwlock: optional rwlock to fence ATS operations691691+ * @has_ats: flag if the array contains an INV_TYPE_ATS or INV_TYPE_ATS_FULL692692+ * @rcu: rcu head for kfree_rcu()693693+ * @inv: flexible invalidation array694694+ *695695+ * The arm_smmu_invs is an RCU data structure. During a ->attach_dev callback,696696+ * arm_smmu_invs_merge(), arm_smmu_invs_unref() and arm_smmu_invs_purge() will697697+ * be used to allocate a new copy of an old array for addition and deletion in698698+ * the old domain's and new domain's invs arrays.699699+ *700700+ * The arm_smmu_invs_unref() mutates a given array, by internally reducing the701701+ * users counts of some given entries. This exists to support a no-fail routine702702+ * like attaching to an IOMMU_DOMAIN_BLOCKED. And it could pair with a followup703703+ * arm_smmu_invs_purge() call to generate a new clean array.704704+ *705705+ * Concurrent invalidation thread will push every invalidation described in the706706+ * array into the command queue for each invalidation event. It is designed like707707+ * this to optimize the invalidation fast path by avoiding locks.708708+ *709709+ * A domain can be shared across SMMU instances. When an instance gets removed,710710+ * it would delete all the entries that belong to that SMMU instance. Then, a711711+ * synchronize_rcu() would have to be called to sync the array, to prevent any712712+ * concurrent invalidation thread accessing the old array from issuing commands713713+ * to the command queue of a removed SMMU instance.714714+ */715715+struct arm_smmu_invs {716716+ size_t max_invs;717717+ size_t num_invs;718718+ size_t num_trashes;719719+ rwlock_t rwlock;720720+ bool has_ats;721721+ struct rcu_head rcu;722722+ struct arm_smmu_inv inv[] __counted_by(max_invs);723723+};724724+725725+static inline struct arm_smmu_invs *arm_smmu_invs_alloc(size_t num_invs)726726+{727727+ struct arm_smmu_invs *new_invs;728728+729729+ new_invs = kzalloc(struct_size(new_invs, inv, num_invs), GFP_KERNEL);730730+ if (!new_invs)731731+ return NULL;732732+ new_invs->max_invs = num_invs;733733+ new_invs->num_invs = num_invs;734734+ rwlock_init(&new_invs->rwlock);735735+ return new_invs;736736+}737737+651738struct arm_smmu_evtq {652739 struct arm_smmu_queue q;653740 struct iopf_queue *iopf;···928841 struct arm_smmu_device *smmu;929842 struct device *dev;930843 struct arm_smmu_stream *streams;844844+ /*845845+ * Scratch memory for a to_merge or to_unref array to build a per-domain846846+ * invalidation array. It'll be pre-allocated with enough enries for all847847+ * possible build scenarios. It can be used by only one caller at a time848848+ * until the arm_smmu_invs_merge/unref() finishes. Must be locked by the849849+ * iommu_group mutex.850850+ */851851+ struct arm_smmu_invs *build_invs;931852 struct arm_smmu_vmaster *vmaster; /* use smmu->streams_mutex */932853 /* Locked by the iommu core using the group mutex */933854 struct arm_smmu_ctx_desc_cfg cd_table;···951856enum arm_smmu_domain_stage {952857 ARM_SMMU_DOMAIN_S1 = 0,953858 ARM_SMMU_DOMAIN_S2,859859+ ARM_SMMU_DOMAIN_SVA,954860};955861956862struct arm_smmu_domain {···967871 };968872969873 struct iommu_domain domain;874874+875875+ struct arm_smmu_invs __rcu *invs;970876971877 /* List of struct arm_smmu_master_domain */972878 struct list_head devices;···1022924void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,1023925 struct arm_smmu_master *master, struct mm_struct *mm,1024926 u16 asid);927927+928928+struct arm_smmu_invs *arm_smmu_invs_merge(struct arm_smmu_invs *invs,929929+ struct arm_smmu_invs *to_merge);930930+void arm_smmu_invs_unref(struct arm_smmu_invs *invs,931931+ struct arm_smmu_invs *to_unref);932932+struct arm_smmu_invs *arm_smmu_invs_purge(struct arm_smmu_invs *invs);1025933#endif10269341027935struct arm_smmu_master_domain {···10599551060956struct arm_smmu_domain *arm_smmu_domain_alloc(void);1061957958958+static inline void arm_smmu_domain_free(struct arm_smmu_domain *smmu_domain)959959+{960960+ /* No concurrency with invalidation is possible at this point */961961+ kfree(rcu_dereference_protected(smmu_domain->invs, true));962962+ kfree(smmu_domain);963963+}964964+1062965void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid);1063966struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,1064967 u32 ssid);···1080969 struct arm_smmu_domain *smmu_domain, ioasid_t pasid,1081970 struct arm_smmu_cd *cd, struct iommu_domain *old);108297110831083-void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);10841084-void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,10851085- size_t granule, bool leaf,10861086- struct arm_smmu_domain *smmu_domain);10871087-int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,10881088- unsigned long iova, size_t size);972972+void arm_smmu_domain_inv_range(struct arm_smmu_domain *smmu_domain,973973+ unsigned long iova, size_t size,974974+ unsigned int granule, bool leaf);975975+976976+static inline void arm_smmu_domain_inv(struct arm_smmu_domain *smmu_domain)977977+{978978+ arm_smmu_domain_inv_range(smmu_domain, 0, 0, 0, false);979979+}10899801090981void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,1091982 struct arm_smmu_cmdq *cmdq);···1104991 IOMMU_FWSPEC_PCI_RC_CANWBS;1105992}1106993994994+/**995995+ * struct arm_smmu_inv_state - Per-domain invalidation array state996996+ * @invs_ptr: points to the domain->invs (unwinding nesting/etc.) or is NULL if997997+ * no change should be made998998+ * @old_invs: the original invs array999999+ * @new_invs: for new domain, this is the new invs array to update domain->invs;10001000+ * for old domain, this is the master->build_invs to pass in as the10011001+ * to_unref argument to an arm_smmu_invs_unref() call10021002+ */10031003+struct arm_smmu_inv_state {10041004+ struct arm_smmu_invs __rcu **invs_ptr;10051005+ struct arm_smmu_invs *old_invs;10061006+ struct arm_smmu_invs *new_invs;10071007+};10081008+11071009struct arm_smmu_attach_state {11081010 /* Inputs */11091011 struct iommu_domain *old_domain;···11281000 ioasid_t ssid;11291001 /* Resulting state */11301002 struct arm_smmu_vmaster *vmaster;10031003+ struct arm_smmu_inv_state old_domain_invst;10041004+ struct arm_smmu_inv_state new_domain_invst;11311005 bool ats_enabled;11321006};11331007
+4-3
drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
···479479 /* Reset VCMDQ */480480 tegra241_vcmdq_hw_deinit(vcmdq);481481482482+ /* vintf->hyp_own is a HW state finalized in tegra241_vintf_hw_init() */483483+ if (!vcmdq->vintf->hyp_own)484484+ vcmdq->cmdq.supports_cmd = tegra241_guest_vcmdq_supports_cmd;485485+482486 /* Configure and enable VCMDQ */483487 writeq_relaxed(vcmdq->cmdq.q.q_base, REG_VCMDQ_PAGE1(vcmdq, BASE));484488···642638 /* ...override q_base to write VCMDQ_BASE registers */643639 q->q_base = q->base_dma & VCMDQ_ADDR;644640 q->q_base |= FIELD_PREP(VCMDQ_LOG2SIZE, q->llq.max_n_shift);645645-646646- if (!vcmdq->vintf->hyp_own)647647- cmdq->supports_cmd = tegra241_guest_vcmdq_supports_cmd;648641649642 return arm_smmu_cmdq_init(smmu, cmdq);650643}
+12-1
drivers/iommu/dma-iommu.c
···1414#include <linux/device.h>1515#include <linux/dma-direct.h>1616#include <linux/dma-map-ops.h>1717+#include <linux/generic_pt/iommu.h>1718#include <linux/gfp.h>1819#include <linux/huge_mm.h>1920#include <linux/iommu.h>···649648 }650649}651650651651+static bool iommu_domain_supports_fq(struct device *dev,652652+ struct iommu_domain *domain)653653+{654654+ /* iommupt always supports DMA-FQ */655655+ if (iommupt_from_domain(domain))656656+ return true;657657+ return device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH);658658+}659659+652660/**653661 * iommu_dma_init_domain - Initialise a DMA mapping domain654662 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()···716706717707 /* If the FQ fails we can simply fall back to strict mode */718708 if (domain->type == IOMMU_DOMAIN_DMA_FQ &&719719- (!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain)))709709+ (!iommu_domain_supports_fq(dev, domain) ||710710+ iommu_dma_init_fq(domain)))720711 domain->type = IOMMU_DOMAIN_DMA;721712722713 return iova_reserve_iommu_regions(dev, domain);
···52525353 Selected automatically by an IOMMU driver that uses this format.54545555+config IOMMU_PT_RISCV645656+ tristate "IOMMU page table for RISC-V 64 bit Sv57/Sv48/Sv39"5757+ depends on !GENERIC_ATOMIC64 # for cmpxchg645858+ help5959+ iommu_domain implementation for RISC-V 64 bit 3/4/5 level page table.6060+ It supports 4K/2M/1G/512G/256T page sizes and can decode a sign6161+ extended portion of the 64 bit IOVA space.6262+6363+ Selected automatically by an IOMMU driver that uses this format.6464+5565config IOMMU_PT_X86_645666 tristate "IOMMU page table for x86 64-bit, 4/5 levels"5767 depends on !GENERIC_ATOMIC64 # for cmpxchg64···7666 tristate "IOMMU Page Table KUnit Test" if !KUNIT_ALL_TESTS7767 depends on KUNIT7868 depends on IOMMU_PT_AMDV1 || !IOMMU_PT_AMDV16969+ depends on IOMMU_PT_RISCV64 || !IOMMU_PT_RISCV647970 depends on IOMMU_PT_X86_64 || !IOMMU_PT_X86_648071 depends on IOMMU_PT_VTDSS || !IOMMU_PT_VTDSS8172 default KUNIT_ALL_TESTS
···5151 iommu_pages_stop_incoherent_list(free_list,5252 iommu_table->iommu_device);53535454- if (pt_feature(common, PT_FEAT_FLUSH_RANGE_NO_GAPS) &&5555- iommu_iotlb_gather_is_disjoint(iotlb_gather, iova, len)) {5656- iommu_iotlb_sync(&iommu_table->domain, iotlb_gather);5757- /*5858- * Note that the sync frees the gather's free list, so we must5959- * not have any pages on that list that are covered by iova/len6060- */5454+ /*5555+ * If running in DMA-FQ mode then the unmap will be followed by an IOTLB5656+ * flush all so we need to optimize by never flushing the IOTLB here.5757+ *5858+ * For NO_GAPS the user gets to pick if flushing all or doing micro5959+ * flushes is better for their work load by choosing DMA vs DMA-FQ6060+ * operation. Drivers should also see shadow_on_flush.6161+ */6262+ if (!iommu_iotlb_gather_queued(iotlb_gather)) {6363+ if (pt_feature(common, PT_FEAT_FLUSH_RANGE_NO_GAPS) &&6464+ iommu_iotlb_gather_is_disjoint(iotlb_gather, iova, len)) {6565+ iommu_iotlb_sync(&iommu_table->domain, iotlb_gather);6666+ /*6767+ * Note that the sync frees the gather's free list, so6868+ * we must not have any pages on that list that are6969+ * covered by iova/len7070+ */7171+ }7272+ iommu_iotlb_gather_add_range(iotlb_gather, iova, len);6173 }62746363- iommu_iotlb_gather_add_range(iotlb_gather, iova, len);6475 iommu_pages_list_splice(free_list, &iotlb_gather->freelist);6576}6677···477466 pt_oaddr_t oa;478467 unsigned int leaf_pgsize_lg2;479468 unsigned int leaf_level;469469+ pt_vaddr_t num_leaves;480470};481471482472/*···530518static int __map_range_leaf(struct pt_range *range, void *arg,531519 unsigned int level, struct pt_table_p *table)532520{521521+ struct pt_iommu *iommu_table = iommu_from_common(range->common);533522 struct pt_state pts = pt_init(range, level, table);534523 struct pt_iommu_map_args *map = arg;535524 unsigned int leaf_pgsize_lg2 = map->leaf_pgsize_lg2;536525 unsigned int start_index;537526 pt_oaddr_t oa = map->oa;527527+ unsigned int num_leaves;528528+ unsigned int orig_end;529529+ pt_vaddr_t last_va;538530 unsigned int step;539531 bool need_contig;540532 int ret = 0;···552536553537 _pt_iter_first(&pts);554538 start_index = pts.index;539539+ orig_end = pts.end_index;540540+ if (pts.index + map->num_leaves < pts.end_index) {541541+ /* Need to stop in the middle of the table to change sizes */542542+ pts.end_index = pts.index + map->num_leaves;543543+ num_leaves = 0;544544+ } else {545545+ num_leaves = map->num_leaves - (pts.end_index - pts.index);546546+ }547547+555548 do {556549 pts.type = pt_load_entry_raw(&pts);557550 if (pts.type != PT_ENTRY_EMPTY || need_contig) {···586561 flush_writes_range(&pts, start_index, pts.index);587562588563 map->oa = oa;589589- return ret;564564+ map->num_leaves = num_leaves;565565+ if (ret || num_leaves)566566+ return ret;567567+568568+ /* range->va is not valid if we reached the end of the table */569569+ pts.index -= step;570570+ pt_index_to_va(&pts);571571+ pts.index += step;572572+ last_va = range->va + log2_to_int(leaf_pgsize_lg2);573573+574574+ if (last_va - 1 == range->last_va) {575575+ PT_WARN_ON(pts.index != orig_end);576576+ return 0;577577+ }578578+579579+ /*580580+ * Reached a point where the page size changed, compute the new581581+ * parameters.582582+ */583583+ map->leaf_pgsize_lg2 = pt_compute_best_pgsize(584584+ iommu_table->domain.pgsize_bitmap, last_va, range->last_va, oa);585585+ map->leaf_level =586586+ pt_pgsz_lg2_to_level(range->common, map->leaf_pgsize_lg2);587587+ map->num_leaves = pt_pgsz_count(iommu_table->domain.pgsize_bitmap,588588+ last_va, range->last_va, oa,589589+ map->leaf_pgsize_lg2);590590+591591+ /* Didn't finish this table level, caller will repeat it */592592+ if (pts.index != orig_end) {593593+ if (pts.index != start_index)594594+ pt_index_to_va(&pts);595595+ return -EAGAIN;596596+ }597597+ return 0;590598}591599592600static int __map_range(struct pt_range *range, void *arg, unsigned int level,···642584 if (pts.type != PT_ENTRY_EMPTY)643585 return -EADDRINUSE;644586 ret = pt_iommu_new_table(&pts, &map->attrs);645645- if (ret) {646646- /*647647- * Racing with another thread installing a table648648- */649649- if (ret == -EAGAIN)650650- continue;587587+ /* EAGAIN on a race will loop again */588588+ if (ret)651589 return ret;652652- }653590 } else {654591 pts.table_lower = pt_table_ptr(&pts);655592 /*···668615 * The already present table can possibly be shared with another669616 * concurrent map.670617 */671671- if (map->leaf_level == level - 1)672672- ret = pt_descend(&pts, arg, __map_range_leaf);673673- else674674- ret = pt_descend(&pts, arg, __map_range);618618+ do {619619+ if (map->leaf_level == level - 1)620620+ ret = pt_descend(&pts, arg, __map_range_leaf);621621+ else622622+ ret = pt_descend(&pts, arg, __map_range);623623+ } while (ret == -EAGAIN);675624 if (ret)676625 return ret;677626···681626 pt_index_to_va(&pts);682627 if (pts.index >= pts.end_index)683628 break;629629+630630+ /*631631+ * This level is currently running __map_range_leaf() which is632632+ * not correct if the target level has been updated to this633633+ * level. Have the caller invoke __map_range_leaf.634634+ */635635+ if (map->leaf_level == level)636636+ return -EAGAIN;684637 } while (true);685638 return 0;686639}···860797static int do_map(struct pt_range *range, struct pt_common *common,861798 bool single_page, struct pt_iommu_map_args *map)862799{800800+ int ret;801801+863802 /*864803 * The __map_single_page() fast path does not support DMA_INCOHERENT865804 * flushing to keep its .text small.866805 */867806 if (single_page && !pt_feature(common, PT_FEAT_DMA_INCOHERENT)) {868868- int ret;869807870808 ret = pt_walk_range(range, __map_single_page, map);871809 if (ret != -EAGAIN)···874810 /* EAGAIN falls through to the full path */875811 }876812877877- if (map->leaf_level == range->top_level)878878- return pt_walk_range(range, __map_range_leaf, map);879879- return pt_walk_range(range, __map_range, map);813813+ do {814814+ if (map->leaf_level == range->top_level)815815+ ret = pt_walk_range(range, __map_range_leaf, map);816816+ else817817+ ret = pt_walk_range(range, __map_range, map);818818+ } while (ret == -EAGAIN);819819+ return ret;880820}881821882882-/**883883- * map_pages() - Install translation for an IOVA range884884- * @domain: Domain to manipulate885885- * @iova: IO virtual address to start886886- * @paddr: Physical/Output address to start887887- * @pgsize: Length of each page888888- * @pgcount: Length of the range in pgsize units starting from @iova889889- * @prot: A bitmap of IOMMU_READ/WRITE/CACHE/NOEXEC/MMIO890890- * @gfp: GFP flags for any memory allocations891891- * @mapped: Total bytes successfully mapped892892- *893893- * The range starting at IOVA will have paddr installed into it. The caller894894- * must specify a valid pgsize and pgcount to segment the range into compatible895895- * blocks.896896- *897897- * On error the caller will probably want to invoke unmap on the range from iova898898- * up to the amount indicated by @mapped to return the table back to an899899- * unchanged state.900900- *901901- * Context: The caller must hold a write range lock that includes the whole902902- * range.903903- *904904- * Returns: -ERRNO on failure, 0 on success. The number of bytes of VA that were905905- * mapped are added to @mapped, @mapped is not zerod first.906906- */907907-int DOMAIN_NS(map_pages)(struct iommu_domain *domain, unsigned long iova,908908- phys_addr_t paddr, size_t pgsize, size_t pgcount,909909- int prot, gfp_t gfp, size_t *mapped)822822+static int NS(map_range)(struct pt_iommu *iommu_table, dma_addr_t iova,823823+ phys_addr_t paddr, dma_addr_t len, unsigned int prot,824824+ gfp_t gfp, size_t *mapped)910825{911911- struct pt_iommu *iommu_table =912912- container_of(domain, struct pt_iommu, domain);913826 pt_vaddr_t pgsize_bitmap = iommu_table->domain.pgsize_bitmap;914827 struct pt_common *common = common_from_iommu(iommu_table);915828 struct iommu_iotlb_gather iotlb_gather;916916- pt_vaddr_t len = pgsize * pgcount;917829 struct pt_iommu_map_args map = {918830 .iotlb_gather = &iotlb_gather,919831 .oa = paddr,920920- .leaf_pgsize_lg2 = vaffs(pgsize),921832 };922833 bool single_page = false;923834 struct pt_range range;···920881 return ret;921882922883 /* Calculate target page size and level for the leaves */923923- if (pt_has_system_page_size(common) && pgsize == PAGE_SIZE &&924924- pgcount == 1) {884884+ if (pt_has_system_page_size(common) && len == PAGE_SIZE) {925885 PT_WARN_ON(!(pgsize_bitmap & PAGE_SIZE));926886 if (log2_mod(iova | paddr, PAGE_SHIFT))927887 return -ENXIO;928888 map.leaf_pgsize_lg2 = PAGE_SHIFT;929889 map.leaf_level = 0;890890+ map.num_leaves = 1;930891 single_page = true;931892 } else {932893 map.leaf_pgsize_lg2 = pt_compute_best_pgsize(···935896 return -ENXIO;936897 map.leaf_level =937898 pt_pgsz_lg2_to_level(common, map.leaf_pgsize_lg2);899899+ map.num_leaves = pt_pgsz_count(pgsize_bitmap, range.va,900900+ range.last_va, paddr,901901+ map.leaf_pgsize_lg2);938902 }939903940904 ret = check_map_range(iommu_table, &range, &map);···960918 *mapped += map.oa - paddr;961919 return ret;962920}963963-EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(map_pages), "GENERIC_PT_IOMMU");964921965922struct pt_unmap_args {966923 struct iommu_pages_list free_list;···10611020 return ret;10621021}1063102210641064-/**10651065- * unmap_pages() - Make a range of IOVA empty/not present10661066- * @domain: Domain to manipulate10671067- * @iova: IO virtual address to start10681068- * @pgsize: Length of each page10691069- * @pgcount: Length of the range in pgsize units starting from @iova10701070- * @iotlb_gather: Gather struct that must be flushed on return10711071- *10721072- * unmap_pages() will remove a translation created by map_pages(). It cannot10731073- * subdivide a mapping created by map_pages(), so it should be called with IOVA10741074- * ranges that match those passed to map_pages(). The IOVA range can aggregate10751075- * contiguous map_pages() calls so long as no individual range is split.10761076- *10771077- * Context: The caller must hold a write range lock that includes10781078- * the whole range.10791079- *10801080- * Returns: Number of bytes of VA unmapped. iova + res will be the point10811081- * unmapping stopped.10821082- */10831083-size_t DOMAIN_NS(unmap_pages)(struct iommu_domain *domain, unsigned long iova,10841084- size_t pgsize, size_t pgcount,10231023+static size_t NS(unmap_range)(struct pt_iommu *iommu_table, dma_addr_t iova,10241024+ dma_addr_t len,10851025 struct iommu_iotlb_gather *iotlb_gather)10861026{10871087- struct pt_iommu *iommu_table =10881088- container_of(domain, struct pt_iommu, domain);10891027 struct pt_unmap_args unmap = { .free_list = IOMMU_PAGES_LIST_INIT(10901028 unmap.free_list) };10911091- pt_vaddr_t len = pgsize * pgcount;10921029 struct pt_range range;10931030 int ret;10941031···1076105710771058 pt_walk_range(&range, __unmap_range, &unmap);1078105910791079- gather_range_pages(iotlb_gather, iommu_table, iova, len,10601060+ gather_range_pages(iotlb_gather, iommu_table, iova, unmap.unmapped,10801061 &unmap.free_list);1081106210821063 return unmap.unmapped;10831064}10841084-EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(unmap_pages), "GENERIC_PT_IOMMU");1085106510861066static void NS(get_info)(struct pt_iommu *iommu_table,10871067 struct pt_iommu_info *info)···11281110}1129111111301112static const struct pt_iommu_ops NS(ops) = {11131113+ .map_range = NS(map_range),11141114+ .unmap_range = NS(unmap_range),11311115#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER) && defined(pt_entry_is_write_dirty) && \11321116 IS_ENABLED(CONFIG_IOMMUFD_TEST) && defined(pt_entry_make_write_dirty)11331117 .set_dirty = NS(set_dirty),···1192117211931173 domain->type = __IOMMU_DOMAIN_PAGING;11941174 domain->pgsize_bitmap = info.pgsize_bitmap;11751175+ domain->is_iommupt = true;1195117611961177 if (pt_feature(common, PT_FEAT_DYNAMIC_TOP))11971178 range = _pt_top_range(common,
···569569 return pgsz_lg2;570570}571571572572+/*573573+ * Return the number of pgsize_lg2 leaf entries that can be mapped for574574+ * va to oa. This accounts for any requirement to reduce or increase the page575575+ * size across the VA range.576576+ */577577+static inline pt_vaddr_t pt_pgsz_count(pt_vaddr_t pgsz_bitmap, pt_vaddr_t va,578578+ pt_vaddr_t last_va, pt_oaddr_t oa,579579+ unsigned int pgsize_lg2)580580+{581581+ pt_vaddr_t len = last_va - va + 1;582582+ pt_vaddr_t next_pgsizes = log2_set_mod(pgsz_bitmap, 0, pgsize_lg2 + 1);583583+584584+ if (next_pgsizes) {585585+ unsigned int next_pgsize_lg2 = vaffs(next_pgsizes);586586+587587+ if (log2_mod(va ^ oa, next_pgsize_lg2) == 0)588588+ len = min(len, log2_set_mod_max(va, next_pgsize_lg2) -589589+ va + 1);590590+ }591591+ return log2_div(len, pgsize_lg2);592592+}593593+572594#define _PT_MAKE_CALL_LEVEL(fn) \573595 static __always_inline int fn(struct pt_range *range, void *arg, \574596 unsigned int level, \
+26-27
drivers/iommu/intel/cache.c
···255255256256static unsigned long calculate_psi_aligned_address(unsigned long start,257257 unsigned long end,258258- unsigned long *_pages,259258 unsigned long *_mask)260259{261260 unsigned long pages = aligned_nrpages(start, end - start + 1);···280281 */281282 shared_bits = ~(pfn ^ end_pfn) & ~bitmask;282283 mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH;283283- aligned_pages = 1UL << mask;284284 }285285286286- *_pages = aligned_pages;287286 *_mask = mask;288287289288 return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);···327330 qi_batch_increment_index(iommu, batch);328331}329332333333+static void qi_batch_add_piotlb_all(struct intel_iommu *iommu, u16 did,334334+ u32 pasid, struct qi_batch *batch)335335+{336336+ qi_desc_piotlb_all(did, pasid, &batch->descs[batch->index]);337337+ qi_batch_increment_index(iommu, batch);338338+}339339+330340static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid,331331- u64 addr, unsigned long npages, bool ih,341341+ u64 addr, unsigned int size_order, bool ih,332342 struct qi_batch *batch)333343{334334- /*335335- * npages == -1 means a PASID-selective invalidation, otherwise,336336- * a positive value for Page-selective-within-PASID invalidation.337337- * 0 is not a valid input.338338- */339339- if (!npages)340340- return;341341-342342- qi_desc_piotlb(did, pasid, addr, npages, ih, &batch->descs[batch->index]);344344+ qi_desc_piotlb(did, pasid, addr, size_order, ih,345345+ &batch->descs[batch->index]);343346 qi_batch_increment_index(iommu, batch);344347}345348···368371}369372370373static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,371371- unsigned long addr, unsigned long pages,372372- unsigned long mask, int ih)374374+ unsigned long addr, unsigned long mask, int ih)373375{374376 struct intel_iommu *iommu = tag->iommu;375377 u64 type = DMA_TLB_PSI_FLUSH;376378377379 if (intel_domain_use_piotlb(domain)) {378378- qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr,379379- pages, ih, domain->qi_batch);380380+ if (mask >= MAX_AGAW_PFN_WIDTH)381381+ qi_batch_add_piotlb_all(iommu, tag->domain_id,382382+ tag->pasid, domain->qi_batch);383383+ else384384+ qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid,385385+ addr, mask, ih, domain->qi_batch);380386 return;381387 }382388···388388 * is too big.389389 */390390 if (!cap_pgsel_inv(iommu->cap) ||391391- mask > cap_max_amask_val(iommu->cap) || pages == -1) {391391+ mask > cap_max_amask_val(iommu->cap)) {392392 addr = 0;393393 mask = 0;394394 ih = 0;···437437 unsigned long end, int ih)438438{439439 struct intel_iommu *iommu = NULL;440440- unsigned long pages, mask, addr;440440+ unsigned long mask, addr;441441 struct cache_tag *tag;442442 unsigned long flags;443443444444 if (start == 0 && end == ULONG_MAX) {445445 addr = 0;446446- pages = -1;447446 mask = MAX_AGAW_PFN_WIDTH;448447 } else {449449- addr = calculate_psi_aligned_address(start, end, &pages, &mask);448448+ addr = calculate_psi_aligned_address(start, end, &mask);450449 }451450452451 spin_lock_irqsave(&domain->cache_lock, flags);···457458 switch (tag->type) {458459 case CACHE_TAG_IOTLB:459460 case CACHE_TAG_NESTING_IOTLB:460460- cache_tag_flush_iotlb(domain, tag, addr, pages, mask, ih);461461+ cache_tag_flush_iotlb(domain, tag, addr, mask, ih);461462 break;462463 case CACHE_TAG_NESTING_DEVTLB:463464 /*···475476 break;476477 }477478478478- trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);479479+ trace_cache_tag_flush_range(tag, start, end, addr, mask);479480 }480481 qi_batch_flush_descs(iommu, domain->qi_batch);481482 spin_unlock_irqrestore(&domain->cache_lock, flags);···505506 unsigned long end)506507{507508 struct intel_iommu *iommu = NULL;508508- unsigned long pages, mask, addr;509509+ unsigned long mask, addr;509510 struct cache_tag *tag;510511 unsigned long flags;511512512512- addr = calculate_psi_aligned_address(start, end, &pages, &mask);513513+ addr = calculate_psi_aligned_address(start, end, &mask);513514514515 spin_lock_irqsave(&domain->cache_lock, flags);515516 list_for_each_entry(tag, &domain->cache_tags, node) {···525526526527 if (tag->type == CACHE_TAG_IOTLB ||527528 tag->type == CACHE_TAG_NESTING_IOTLB)528528- cache_tag_flush_iotlb(domain, tag, addr, pages, mask, 0);529529+ cache_tag_flush_iotlb(domain, tag, addr, mask, 0);529530530530- trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);531531+ trace_cache_tag_flush_range_np(tag, start, end, addr, mask);531532 }532533 qi_batch_flush_descs(iommu, domain->qi_batch);533534 spin_unlock_irqrestore(&domain->cache_lock, flags);
+9-9
drivers/iommu/intel/debugfs.c
···133133 */134134 raw_spin_lock_irqsave(&iommu->register_lock, flag);135135 for (i = 0 ; i < ARRAY_SIZE(iommu_regs_32); i++) {136136- value = dmar_readl(iommu->reg + iommu_regs_32[i].offset);136136+ value = readl(iommu->reg + iommu_regs_32[i].offset);137137 seq_printf(m, "%-16s\t0x%02x\t\t0x%016llx\n",138138 iommu_regs_32[i].regs, iommu_regs_32[i].offset,139139 value);140140 }141141 for (i = 0 ; i < ARRAY_SIZE(iommu_regs_64); i++) {142142- value = dmar_readq(iommu->reg + iommu_regs_64[i].offset);142142+ value = readq(iommu->reg + iommu_regs_64[i].offset);143143 seq_printf(m, "%-16s\t0x%02x\t\t0x%016llx\n",144144 iommu_regs_64[i].regs, iommu_regs_64[i].offset,145145 value);···247247 tbl_wlk.ctx_entry = context;248248 m->private = &tbl_wlk;249249250250- if (dmar_readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT) {250250+ if (readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT) {251251 pasid_dir_ptr = context->lo & VTD_PAGE_MASK;252252 pasid_dir_size = get_pasid_dir_size(context);253253 pasid_dir_walk(m, pasid_dir_ptr, pasid_dir_size);···285285286286 rcu_read_lock();287287 for_each_active_iommu(iommu, drhd) {288288- sts = dmar_readl(iommu->reg + DMAR_GSTS_REG);288288+ sts = readl(iommu->reg + DMAR_GSTS_REG);289289 if (!(sts & DMA_GSTS_TES)) {290290 seq_printf(m, "DMA Remapping is not enabled on %s\n",291291 iommu->name);···364364 if (seg != iommu->segment)365365 continue;366366367367- sts = dmar_readl(iommu->reg + DMAR_GSTS_REG);367367+ sts = readl(iommu->reg + DMAR_GSTS_REG);368368 if (!(sts & DMA_GSTS_TES)) {369369 seq_printf(m, "DMA Remapping is not enabled on %s\n",370370 iommu->name);371371 continue;372372 }373373- if (dmar_readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT)373373+ if (readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT)374374 scalable = true;375375 else376376 scalable = false;···538538 raw_spin_lock_irqsave(&qi->q_lock, flags);539539 seq_printf(m, " Base: 0x%llx\tHead: %lld\tTail: %lld\n",540540 (u64)virt_to_phys(qi->desc),541541- dmar_readq(iommu->reg + DMAR_IQH_REG) >> shift,542542- dmar_readq(iommu->reg + DMAR_IQT_REG) >> shift);541541+ readq(iommu->reg + DMAR_IQH_REG) >> shift,542542+ readq(iommu->reg + DMAR_IQT_REG) >> shift);543543 invalidation_queue_entry_show(m, iommu);544544 raw_spin_unlock_irqrestore(&qi->q_lock, flags);545545 seq_putc(m, '\n');···620620 seq_printf(m, "Remapped Interrupt supported on IOMMU: %s\n",621621 iommu->name);622622623623- sts = dmar_readl(iommu->reg + DMAR_GSTS_REG);623623+ sts = readl(iommu->reg + DMAR_GSTS_REG);624624 if (iommu->ir_table && (sts & DMA_GSTS_IRES)) {625625 irta = virt_to_phys(iommu->ir_table->base);626626 seq_printf(m, " IR table address:%llx\n", irta);
+16-27
drivers/iommu/intel/dmar.c
···899899 return -EINVAL;900900 }901901902902- cap = dmar_readq(addr + DMAR_CAP_REG);903903- ecap = dmar_readq(addr + DMAR_ECAP_REG);902902+ cap = readq(addr + DMAR_CAP_REG);903903+ ecap = readq(addr + DMAR_ECAP_REG);904904905905 if (arg)906906 iounmap(addr);···982982 goto release;983983 }984984985985- iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);986986- iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);985985+ iommu->cap = readq(iommu->reg + DMAR_CAP_REG);986986+ iommu->ecap = readq(iommu->reg + DMAR_ECAP_REG);987987988988 if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {989989 err = -EINVAL;···10171017 int i;1018101810191019 for (i = 0; i < DMA_MAX_NUM_ECMDCAP; i++) {10201020- iommu->ecmdcap[i] = dmar_readq(iommu->reg + DMAR_ECCAP_REG +10211021- i * DMA_ECMD_REG_STEP);10201020+ iommu->ecmdcap[i] = readq(iommu->reg + DMAR_ECCAP_REG +10211021+ i * DMA_ECMD_REG_STEP);10221022 }10231023 }10241024···1239123912401240static void qi_dump_fault(struct intel_iommu *iommu, u32 fault)12411241{12421242- unsigned int head = dmar_readl(iommu->reg + DMAR_IQH_REG);12431243- u64 iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG);12421242+ unsigned int head = readl(iommu->reg + DMAR_IQH_REG);12431243+ u64 iqe_err = readq(iommu->reg + DMAR_IQER_REG);12441244 struct qi_desc *desc = iommu->qi->desc + head;1245124512461246 if (fault & DMA_FSTS_IQE)···13211321 * SID field is valid only when the ITE field is Set in FSTS_REG13221322 * see Intel VT-d spec r4.1, section 11.4.9.913231323 */13241324- iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG);13241324+ iqe_err = readq(iommu->reg + DMAR_IQER_REG);13251325 ite_sid = DMAR_IQER_REG_ITESID(iqe_err);1326132613271327 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);···15501550 qi_submit_sync(iommu, &desc, 1, 0);15511551}1552155215531553-/* PASID-based IOTLB invalidation */15541554-void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,15551555- unsigned long npages, bool ih)15531553+/* PASID-selective IOTLB invalidation */15541554+void qi_flush_piotlb_all(struct intel_iommu *iommu, u16 did, u32 pasid)15561555{15571557- struct qi_desc desc = {.qw2 = 0, .qw3 = 0};15561556+ struct qi_desc desc = {};1558155715591559- /*15601560- * npages == -1 means a PASID-selective invalidation, otherwise,15611561- * a positive value for Page-selective-within-PASID invalidation.15621562- * 0 is not a valid input.15631563- */15641564- if (WARN_ON(!npages)) {15651565- pr_err("Invalid input npages = %ld\n", npages);15661566- return;15671567- }15681568-15691569- qi_desc_piotlb(did, pasid, addr, npages, ih, &desc);15581558+ qi_desc_piotlb_all(did, pasid, &desc);15701559 qi_submit_sync(iommu, &desc, 1, 0);15711560}15721561···16501661 /* write zero to the tail reg */16511662 writel(0, iommu->reg + DMAR_IQT_REG);1652166316531653- dmar_writeq(iommu->reg + DMAR_IQA_REG, val);16641664+ writeq(val, iommu->reg + DMAR_IQA_REG);1654166516551666 iommu->gcmd |= DMA_GCMD_QIE;16561667 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);···19691980 source_id = dma_frcd_source_id(data);1970198119711982 pasid_present = dma_frcd_pasid_present(data);19721972- guest_addr = dmar_readq(iommu->reg + reg +19731973- fault_index * PRIMARY_FAULT_REG_LEN);19831983+ guest_addr = readq(iommu->reg + reg +19841984+ fault_index * PRIMARY_FAULT_REG_LEN);19741985 guest_addr = dma_frcd_page_addr(guest_addr);19751986 }19761987
+31-27
drivers/iommu/intel/iommu.c
···697697 addr |= DMA_RTADDR_SMT;698698699699 raw_spin_lock_irqsave(&iommu->register_lock, flag);700700- dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);700700+ writeq(addr, iommu->reg + DMAR_RTADDR_REG);701701702702 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);703703···765765 val |= DMA_CCMD_ICC;766766767767 raw_spin_lock_irqsave(&iommu->register_lock, flag);768768- dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);768768+ writeq(val, iommu->reg + DMAR_CCMD_REG);769769770770 /* Make sure hardware complete it */771771 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,772772- dmar_readq, (!(val & DMA_CCMD_ICC)), val);772772+ readq, (!(val & DMA_CCMD_ICC)), val);773773774774 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);775775}···806806 raw_spin_lock_irqsave(&iommu->register_lock, flag);807807 /* Note: Only uses first TLB reg currently */808808 if (val_iva)809809- dmar_writeq(iommu->reg + tlb_offset, val_iva);810810- dmar_writeq(iommu->reg + tlb_offset + 8, val);809809+ writeq(val_iva, iommu->reg + tlb_offset);810810+ writeq(val, iommu->reg + tlb_offset + 8);811811812812 /* Make sure hardware complete it */813813 IOMMU_WAIT_OP(iommu, tlb_offset + 8,814814- dmar_readq, (!(val & DMA_TLB_IVT)), val);814814+ readq, (!(val & DMA_TLB_IVT)), val);815815816816 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);817817···15331533 int bus, ret;15341534 bool new_ext, ext;1535153515361536- rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);15361536+ rtaddr_reg = readq(iommu->reg + DMAR_RTADDR_REG);15371537 ext = !!(rtaddr_reg & DMA_RTADDR_SMT);15381538 new_ext = !!sm_supported(iommu);15391539···3212321232133213 switch (cap) {32143214 case IOMMU_CAP_CACHE_COHERENCY:32153215- case IOMMU_CAP_DEFERRED_FLUSH:32163215 return true;32173216 case IOMMU_CAP_PRE_BOOT_PROTECTION:32183217 return dmar_platform_optin();···32193220 return ecap_sc_support(info->iommu->ecap);32203221 case IOMMU_CAP_DIRTY_TRACKING:32213222 return ssads_supported(info->iommu);32233223+ case IOMMU_CAP_PCI_ATS_SUPPORTED:32243224+ return info->ats_supported;32223225 default:32233226 return false;32243227 }···36193618 if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))36203619 return -EOPNOTSUPP;3621362036223622- if (domain->dirty_ops)36233623- return -EINVAL;36243624-36253621 if (context_copied(iommu, info->bus, info->devfn))36263622 return -EBUSY;36273623···36823684 return vtd;36833685}3684368636853685-/*36863686- * Set dirty tracking for the device list of a domain. The caller must36873687- * hold the domain->lock when calling it.36883688- */36893689-static int device_set_dirty_tracking(struct list_head *devices, bool enable)36873687+/* Set dirty tracking for the devices that the domain has been attached. */36883688+static int domain_set_dirty_tracking(struct dmar_domain *domain, bool enable)36903689{36913690 struct device_domain_info *info;36913691+ struct dev_pasid_info *dev_pasid;36923692 int ret = 0;3693369336943694- list_for_each_entry(info, devices, link) {36943694+ lockdep_assert_held(&domain->lock);36953695+36963696+ list_for_each_entry(info, &domain->devices, link) {36953697 ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev,36963698 IOMMU_NO_PASID, enable);36993699+ if (ret)37003700+ return ret;37013701+ }37023702+37033703+ list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {37043704+ info = dev_iommu_priv_get(dev_pasid->dev);37053705+ ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev,37063706+ dev_pasid->pasid, enable);36973707 if (ret)36983708 break;36993709 }···37193713 spin_lock(&domain->s1_lock);37203714 list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {37213715 spin_lock_irqsave(&s1_domain->lock, flags);37223722- ret = device_set_dirty_tracking(&s1_domain->devices, enable);37163716+ ret = domain_set_dirty_tracking(s1_domain, enable);37233717 spin_unlock_irqrestore(&s1_domain->lock, flags);37243718 if (ret)37253719 goto err_unwind;···37303724err_unwind:37313725 list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {37323726 spin_lock_irqsave(&s1_domain->lock, flags);37333733- device_set_dirty_tracking(&s1_domain->devices,37343734- domain->dirty_tracking);37273727+ domain_set_dirty_tracking(s1_domain, domain->dirty_tracking);37353728 spin_unlock_irqrestore(&s1_domain->lock, flags);37363729 }37373730 spin_unlock(&domain->s1_lock);···37473742 if (dmar_domain->dirty_tracking == enable)37483743 goto out_unlock;3749374437503750- ret = device_set_dirty_tracking(&dmar_domain->devices, enable);37453745+ ret = domain_set_dirty_tracking(dmar_domain, enable);37513746 if (ret)37523747 goto err_unwind;37533748···37643759 return 0;3765376037663761err_unwind:37673767- device_set_dirty_tracking(&dmar_domain->devices,37683768- dmar_domain->dirty_tracking);37623762+ domain_set_dirty_tracking(dmar_domain, dmar_domain->dirty_tracking);37693763 spin_unlock(&dmar_domain->lock);37703764 return ret;37713765}···4189418541904186 raw_spin_lock_irqsave(&iommu->register_lock, flags);4191418741924192- res = dmar_readq(iommu->reg + DMAR_ECRSP_REG);41884188+ res = readq(iommu->reg + DMAR_ECRSP_REG);41934189 if (res & DMA_ECMD_ECRSP_IP) {41944190 ret = -EBUSY;41954191 goto err;···42024198 * - It's not invoked in any critical path. The extra MMIO42034199 * write doesn't bring any performance concerns.42044200 */42054205- dmar_writeq(iommu->reg + DMAR_ECEO_REG, ob);42064206- dmar_writeq(iommu->reg + DMAR_ECMD_REG, ecmd | (oa << DMA_ECMD_OA_SHIFT));42014201+ writeq(ob, iommu->reg + DMAR_ECEO_REG);42024202+ writeq(ecmd | (oa << DMA_ECMD_OA_SHIFT), iommu->reg + DMAR_ECMD_REG);4207420342084208- IOMMU_WAIT_OP(iommu, DMAR_ECRSP_REG, dmar_readq,42044204+ IOMMU_WAIT_OP(iommu, DMAR_ECRSP_REG, readq,42094205 !(res & DMA_ECMD_ECRSP_IP), res);4210420642114207 if (res & DMA_ECMD_ECRSP_IP) {
···132132133133DECLARE_EVENT_CLASS(cache_tag_flush,134134 TP_PROTO(struct cache_tag *tag, unsigned long start, unsigned long end,135135- unsigned long addr, unsigned long pages, unsigned long mask),136136- TP_ARGS(tag, start, end, addr, pages, mask),135135+ unsigned long addr, unsigned long mask),136136+ TP_ARGS(tag, start, end, addr, mask),137137 TP_STRUCT__entry(138138 __string(iommu, tag->iommu->name)139139 __string(dev, dev_name(tag->dev))···143143 __field(unsigned long, start)144144 __field(unsigned long, end)145145 __field(unsigned long, addr)146146- __field(unsigned long, pages)147146 __field(unsigned long, mask)148147 ),149148 TP_fast_assign(···154155 __entry->start = start;155156 __entry->end = end;156157 __entry->addr = addr;157157- __entry->pages = pages;158158 __entry->mask = mask;159159 ),160160- TP_printk("%s %s[%d] type %s did %d [0x%lx-0x%lx] addr 0x%lx pages 0x%lx mask 0x%lx",160160+ TP_printk("%s %s[%d] type %s did %d [0x%lx-0x%lx] addr 0x%lx mask 0x%lx",161161 __get_str(iommu), __get_str(dev), __entry->pasid,162162 __print_symbolic(__entry->type,163163 { CACHE_TAG_IOTLB, "iotlb" },···164166 { CACHE_TAG_NESTING_IOTLB, "nesting_iotlb" },165167 { CACHE_TAG_NESTING_DEVTLB, "nesting_devtlb" }),166168 __entry->domain_id, __entry->start, __entry->end,167167- __entry->addr, __entry->pages, __entry->mask169169+ __entry->addr, __entry->mask168170 )169171);170172171173DEFINE_EVENT(cache_tag_flush, cache_tag_flush_range,172174 TP_PROTO(struct cache_tag *tag, unsigned long start, unsigned long end,173173- unsigned long addr, unsigned long pages, unsigned long mask),174174- TP_ARGS(tag, start, end, addr, pages, mask)175175+ unsigned long addr, unsigned long mask),176176+ TP_ARGS(tag, start, end, addr, mask)175177);176178177179DEFINE_EVENT(cache_tag_flush, cache_tag_flush_range_np,178180 TP_PROTO(struct cache_tag *tag, unsigned long start, unsigned long end,179179- unsigned long addr, unsigned long pages, unsigned long mask),180180- TP_ARGS(tag, start, end, addr, pages, mask)181181+ unsigned long addr, unsigned long mask),182182+ TP_ARGS(tag, start, end, addr, mask)181183);182184#endif /* _TRACE_INTEL_IOMMU_H */183185
+58-14
drivers/iommu/iommu.c
···3434#include <linux/sched/mm.h>3535#include <linux/msi.h>3636#include <uapi/linux/iommufd.h>3737+#include <linux/generic_pt/iommu.h>37383839#include "dma-iommu.h"3940#include "iommu-priv.h"···25732572 return pgsize;25742573}2575257425762576-int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,25772577- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)25752575+static int __iommu_map_domain_pgtbl(struct iommu_domain *domain,25762576+ unsigned long iova, phys_addr_t paddr,25772577+ size_t size, int prot, gfp_t gfp)25782578{25792579 const struct iommu_domain_ops *ops = domain->ops;25802580 unsigned long orig_iova = iova;25812581 unsigned int min_pagesz;25822582 size_t orig_size = size;25832583- phys_addr_t orig_paddr = paddr;25842583 int ret = 0;2585258425862585 might_sleep_if(gfpflags_allow_blocking(gfp));···26372636 /* unroll mapping in case something went wrong */26382637 if (ret) {26392638 iommu_unmap(domain, orig_iova, orig_size - size);26402640- } else {26412641- trace_map(orig_iova, orig_paddr, orig_size);26422642- iommu_debug_map(domain, orig_paddr, orig_size);26392639+ return ret;26432640 }26442644-26452645- return ret;26412641+ return 0;26462642}2647264326482644int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size)···26492651 if (!ops->iotlb_sync_map)26502652 return 0;26512653 return ops->iotlb_sync_map(domain, iova, size);26542654+}26552655+26562656+int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,26572657+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)26582658+{26592659+ struct pt_iommu *pt = iommupt_from_domain(domain);26602660+ int ret;26612661+26622662+ if (pt) {26632663+ size_t mapped = 0;26642664+26652665+ ret = pt->ops->map_range(pt, iova, paddr, size, prot, gfp,26662666+ &mapped);26672667+ if (ret) {26682668+ iommu_unmap(domain, iova, mapped);26692669+ return ret;26702670+ }26712671+ return 0;26722672+ }26732673+ ret = __iommu_map_domain_pgtbl(domain, iova, paddr, size, prot, gfp);26742674+ if (!ret)26752675+ return ret;26762676+26772677+ trace_map(iova, paddr, size);26782678+ iommu_debug_map(domain, paddr, size);26792679+ return 0;26522680}2653268126542682int iommu_map(struct iommu_domain *domain, unsigned long iova,···26942670}26952671EXPORT_SYMBOL_GPL(iommu_map);2696267226972697-static size_t __iommu_unmap(struct iommu_domain *domain,26982698- unsigned long iova, size_t size,26992699- struct iommu_iotlb_gather *iotlb_gather)26732673+static size_t26742674+__iommu_unmap_domain_pgtbl(struct iommu_domain *domain, unsigned long iova,26752675+ size_t size, struct iommu_iotlb_gather *iotlb_gather)27002676{27012677 const struct iommu_domain_ops *ops = domain->ops;27022678 size_t unmapped_page, unmapped = 0;27032703- unsigned long orig_iova = iova;27042679 unsigned int min_pagesz;2705268027062681 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))···2740271727412718 pr_debug("unmapped: iova 0x%lx size 0x%zx\n",27422719 iova, unmapped_page);27202720+ /*27212721+ * If the driver itself isn't using the gather, make sure27222722+ * it looks non-empty so iotlb_sync will still be called.27232723+ */27242724+ if (iotlb_gather->start >= iotlb_gather->end)27252725+ iommu_iotlb_gather_add_range(iotlb_gather, iova, size);2743272627442727 iova += unmapped_page;27452728 unmapped += unmapped_page;27462729 }2747273027482748- trace_unmap(orig_iova, size, unmapped);27492749- iommu_debug_unmap_end(domain, orig_iova, size, unmapped);27312731+ return unmapped;27322732+}27332733+27342734+static size_t __iommu_unmap(struct iommu_domain *domain, unsigned long iova,27352735+ size_t size,27362736+ struct iommu_iotlb_gather *iotlb_gather)27372737+{27382738+ struct pt_iommu *pt = iommupt_from_domain(domain);27392739+ size_t unmapped;27402740+27412741+ if (pt)27422742+ unmapped = pt->ops->unmap_range(pt, iova, size, iotlb_gather);27432743+ else27442744+ unmapped = __iommu_unmap_domain_pgtbl(domain, iova, size,27452745+ iotlb_gather);27462746+ trace_unmap(iova, size, unmapped);27472747+ iommu_debug_unmap_end(domain, iova, size, unmapped);27502748 return unmapped;27512749}27522750
+4
drivers/iommu/iommufd/device.c
···16241624 if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING))16251625 cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING;1626162616271627+ /* Report when ATS cannot be used for this device */16281628+ if (!device_iommu_capable(idev->dev, IOMMU_CAP_PCI_ATS_SUPPORTED))16291629+ cmd->out_capabilities |= IOMMU_HW_CAP_PCI_ATS_NOT_SUPPORTED;16301630+16271631 cmd->out_max_pasid_log2 = 0;16281632 /*16291633 * Currently, all iommu drivers enable PASID in the probe_device()
···6868 iommu->caps = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_CAPABILITIES);6969 iommu->fctl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_FCTL);70707171- iommu->irqs_count = platform_irq_count(pdev);7272- if (iommu->irqs_count <= 0)7373- return dev_err_probe(dev, -ENODEV,7474- "no IRQ resources provided\n");7575- if (iommu->irqs_count > RISCV_IOMMU_INTR_COUNT)7676- iommu->irqs_count = RISCV_IOMMU_INTR_COUNT;7171+ iommu->irqs_count = RISCV_IOMMU_INTR_COUNT;77727873 igs = FIELD_GET(RISCV_IOMMU_CAPABILITIES_IGS, iommu->caps);7974 switch (igs) {···115120 fallthrough;116121117122 case RISCV_IOMMU_CAPABILITIES_IGS_WSI:123123+ ret = platform_irq_count(pdev);124124+ if (ret <= 0)125125+ return dev_err_probe(dev, -ENODEV,126126+ "no IRQ resources provided\n");127127+128128+ iommu->irqs_count = ret;129129+130130+ if (iommu->irqs_count > RISCV_IOMMU_INTR_COUNT)131131+ iommu->irqs_count = RISCV_IOMMU_INTR_COUNT;132132+118133 for (vec = 0; vec < iommu->irqs_count; vec++)119134 iommu->irqs[vec] = platform_get_irq(pdev, vec);120135
+123-263
drivers/iommu/riscv/iommu.c
···2121#include <linux/iopoll.h>2222#include <linux/kernel.h>2323#include <linux/pci.h>2424+#include <linux/generic_pt/iommu.h>24252526#include "../iommu-pages.h"2627#include "iommu-bits.h"···160159 if (FIELD_GET(RISCV_IOMMU_PPN_FIELD, qb)) {161160 const size_t queue_size = entry_size << (logsz + 1);162161163163- queue->phys = pfn_to_phys(FIELD_GET(RISCV_IOMMU_PPN_FIELD, qb));162162+ queue->phys = PFN_PHYS(FIELD_GET(RISCV_IOMMU_PPN_FIELD, qb));164163 queue->base = devm_ioremap(iommu->dev, queue->phys, queue_size);165164 } else {166165 do {···369368 unsigned int timeout_us)370369{371370 unsigned int cons = atomic_read(&queue->head);371371+ unsigned int flags = RISCV_IOMMU_CQCSR_CQMF | RISCV_IOMMU_CQCSR_CMD_TO |372372+ RISCV_IOMMU_CQCSR_CMD_ILL;372373373374 /* Already processed by the consumer */374375 if ((int)(cons - index) > 0)···378375379376 /* Monitor consumer index */380377 return readx_poll_timeout(riscv_iommu_queue_cons, queue, cons,378378+ (riscv_iommu_readl(queue->iommu, queue->qcr) & flags) ||381379 (int)(cons - index) > 0, 0, timeout_us);382380}383381···439435 * 6. Make sure the doorbell write to the device has finished before updating440436 * the shadow tail index in normal memory. 'fence o, w'441437 */438438+#ifdef CONFIG_MMIOWB442439 mmiowb();440440+#endif443441 atomic_inc(&queue->tail);444442445443 /* 7. Complete submission and restore local interrupts */···812806813807/* This struct contains protection domain specific IOMMU driver data. */814808struct riscv_iommu_domain {815815- struct iommu_domain domain;809809+ union {810810+ struct iommu_domain domain;811811+ struct pt_iommu_riscv_64 riscvpt;812812+ };816813 struct list_head bonds;817814 spinlock_t lock; /* protect bonds list updates. */818815 int pscid;819819- bool amo_enabled;820820- int numa_node;821821- unsigned int pgd_mode;822822- unsigned long *pgd_root;823816};817817+PT_IOMMU_CHECK_DOMAIN(struct riscv_iommu_domain, riscvpt.iommu, domain);824818825819#define iommu_domain_to_riscv(iommu_domain) \826820 container_of(iommu_domain, struct riscv_iommu_domain, domain)···934928 struct riscv_iommu_bond *bond;935929 struct riscv_iommu_device *iommu, *prev;936930 struct riscv_iommu_command cmd;937937- unsigned long len = end - start + 1;938938- unsigned long iova;939931940932 /*941933 * For each IOMMU linked with this protection domain (via bonds->dev),···976972977973 riscv_iommu_cmd_inval_vma(&cmd);978974 riscv_iommu_cmd_inval_set_pscid(&cmd, domain->pscid);979979- if (len && len < RISCV_IOMMU_IOTLB_INVAL_LIMIT) {980980- for (iova = start; iova < end; iova += PAGE_SIZE) {975975+ if (end - start < RISCV_IOMMU_IOTLB_INVAL_LIMIT - 1) {976976+ unsigned long iova = start;977977+978978+ do {981979 riscv_iommu_cmd_inval_set_addr(&cmd, iova);982980 riscv_iommu_cmd_send(iommu, &cmd);983983- }981981+ } while (!check_add_overflow(iova, PAGE_SIZE, &iova) &&982982+ iova < end);984983 } else {985984 riscv_iommu_cmd_send(iommu, &cmd);986985 }···1003996}10049971005998#define RISCV_IOMMU_FSC_BARE 0999999+/*10001000+ * This function sends IOTINVAL commands as required by the RISC-V10011001+ * IOMMU specification (Section 6.3.1 and 6.3.2 in 1.0 spec version)10021002+ * after modifying DDT or PDT entries10031003+ */10041004+static void riscv_iommu_iodir_iotinval(struct riscv_iommu_device *iommu,10051005+ bool inval_pdt, unsigned long iohgatp,10061006+ struct riscv_iommu_dc *dc,10071007+ struct riscv_iommu_pc *pc)10081008+{10091009+ struct riscv_iommu_command cmd;1006101010111011+ riscv_iommu_cmd_inval_vma(&cmd);10121012+10131013+ if (FIELD_GET(RISCV_IOMMU_DC_IOHGATP_MODE, iohgatp) ==10141014+ RISCV_IOMMU_DC_IOHGATP_MODE_BARE) {10151015+ if (inval_pdt) {10161016+ /*10171017+ * IOTINVAL.VMA with GV=AV=0, and PSCV=1, and10181018+ * PSCID=PC.PSCID10191019+ */10201020+ riscv_iommu_cmd_inval_set_pscid(&cmd,10211021+ FIELD_GET(RISCV_IOMMU_PC_TA_PSCID, pc->ta));10221022+ } else {10231023+ if (!FIELD_GET(RISCV_IOMMU_DC_TC_PDTV, dc->tc) &&10241024+ FIELD_GET(RISCV_IOMMU_DC_FSC_MODE, dc->fsc) !=10251025+ RISCV_IOMMU_DC_FSC_MODE_BARE) {10261026+ /*10271027+ * DC.tc.PDTV == 0 && DC.fsc.MODE != Bare10281028+ * IOTINVAL.VMA with GV=AV=0, and PSCV=1, and10291029+ * PSCID=DC.ta.PSCID10301030+ */10311031+ riscv_iommu_cmd_inval_set_pscid(&cmd,10321032+ FIELD_GET(RISCV_IOMMU_DC_TA_PSCID, dc->ta));10331033+ }10341034+ /* else: IOTINVAL.VMA with GV=AV=PSCV=0 */10351035+ }10361036+ } else {10371037+ riscv_iommu_cmd_inval_set_gscid(&cmd,10381038+ FIELD_GET(RISCV_IOMMU_DC_IOHGATP_GSCID, iohgatp));10391039+10401040+ if (inval_pdt) {10411041+ /*10421042+ * IOTINVAL.VMA with GV=1, AV=0, and PSCV=1, and10431043+ * GSCID=DC.iohgatp.GSCID, PSCID=PC.PSCID10441044+ */10451045+ riscv_iommu_cmd_inval_set_pscid(&cmd,10461046+ FIELD_GET(RISCV_IOMMU_PC_TA_PSCID, pc->ta));10471047+ }10481048+ /*10491049+ * else: IOTINVAL.VMA with GV=1,AV=PSCV=0,and10501050+ * GSCID=DC.iohgatp.GSCID10511051+ *10521052+ * IOTINVAL.GVMA with GV=1,AV=0,and10531053+ * GSCID=DC.iohgatp.GSCID10541054+ * TODO: For now, the Second-Stage feature have not yet been merged,10551055+ * also issue IOTINVAL.GVMA once second-stage support is merged.10561056+ */10571057+ }10581058+ riscv_iommu_cmd_send(iommu, &cmd);10591059+}10071060/*10081061 * Update IODIR for the device.10091062 *···10981031 riscv_iommu_cmd_iodir_inval_ddt(&cmd);10991032 riscv_iommu_cmd_iodir_set_did(&cmd, fwspec->ids[i]);11001033 riscv_iommu_cmd_send(iommu, &cmd);10341034+ /*10351035+ * For now, the SVA and PASID features have not yet been merged, the10361036+ * default configuration is inval_pdt=false and pc=NULL.10371037+ */10381038+ riscv_iommu_iodir_iotinval(iommu, false, dc->iohgatp, dc, NULL);11011039 sync_required = true;11021040 }11031041···11281056 riscv_iommu_cmd_iodir_inval_ddt(&cmd);11291057 riscv_iommu_cmd_iodir_set_did(&cmd, fwspec->ids[i]);11301058 riscv_iommu_cmd_send(iommu, &cmd);10591059+ /*10601060+ * For now, the SVA and PASID features have not yet been merged, the10611061+ * default configuration is inval_pdt=false and pc=NULL.10621062+ */10631063+ riscv_iommu_iodir_iotinval(iommu, false, dc->iohgatp, dc, NULL);11311064 }1132106511331066 riscv_iommu_cmd_sync(iommu, RISCV_IOMMU_IOTINVAL_TIMEOUT);···11541077{11551078 struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);1156107911571157- riscv_iommu_iotlb_inval(domain, gather->start, gather->end);11581158-}11591159-11601160-#define PT_SHIFT (PAGE_SHIFT - ilog2(sizeof(pte_t)))11611161-11621162-#define _io_pte_present(pte) ((pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE))11631163-#define _io_pte_leaf(pte) ((pte) & _PAGE_LEAF)11641164-#define _io_pte_none(pte) ((pte) == 0)11651165-#define _io_pte_entry(pn, prot) ((_PAGE_PFN_MASK & ((pn) << _PAGE_PFN_SHIFT)) | (prot))11661166-11671167-static void riscv_iommu_pte_free(struct riscv_iommu_domain *domain,11681168- unsigned long pte,11691169- struct iommu_pages_list *freelist)11701170-{11711171- unsigned long *ptr;11721172- int i;11731173-11741174- if (!_io_pte_present(pte) || _io_pte_leaf(pte))11751175- return;11761176-11771177- ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));11781178-11791179- /* Recursively free all sub page table pages */11801180- for (i = 0; i < PTRS_PER_PTE; i++) {11811181- pte = READ_ONCE(ptr[i]);11821182- if (!_io_pte_none(pte) && cmpxchg_relaxed(ptr + i, pte, 0) == pte)11831183- riscv_iommu_pte_free(domain, pte, freelist);11841184- }11851185-11861186- if (freelist)11871187- iommu_pages_list_add(freelist, ptr);11881188- else11891189- iommu_free_pages(ptr);11901190-}11911191-11921192-static unsigned long *riscv_iommu_pte_alloc(struct riscv_iommu_domain *domain,11931193- unsigned long iova, size_t pgsize,11941194- gfp_t gfp)11951195-{11961196- unsigned long *ptr = domain->pgd_root;11971197- unsigned long pte, old;11981198- int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2;11991199- void *addr;12001200-12011201- do {12021202- const int shift = PAGE_SHIFT + PT_SHIFT * level;12031203-12041204- ptr += ((iova >> shift) & (PTRS_PER_PTE - 1));12051205- /*12061206- * Note: returned entry might be a non-leaf if there was12071207- * existing mapping with smaller granularity. Up to the caller12081208- * to replace and invalidate.12091209- */12101210- if (((size_t)1 << shift) == pgsize)12111211- return ptr;12121212-pte_retry:12131213- pte = READ_ONCE(*ptr);12141214- /*12151215- * This is very likely incorrect as we should not be adding12161216- * new mapping with smaller granularity on top12171217- * of existing 2M/1G mapping. Fail.12181218- */12191219- if (_io_pte_present(pte) && _io_pte_leaf(pte))12201220- return NULL;12211221- /*12221222- * Non-leaf entry is missing, allocate and try to add to the12231223- * page table. This might race with other mappings, retry.12241224- */12251225- if (_io_pte_none(pte)) {12261226- addr = iommu_alloc_pages_node_sz(domain->numa_node, gfp,12271227- SZ_4K);12281228- if (!addr)12291229- return NULL;12301230- old = pte;12311231- pte = _io_pte_entry(virt_to_pfn(addr), _PAGE_TABLE);12321232- if (cmpxchg_relaxed(ptr, old, pte) != old) {12331233- iommu_free_pages(addr);12341234- goto pte_retry;12351235- }12361236- }12371237- ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));12381238- } while (level-- > 0);12391239-12401240- return NULL;12411241-}12421242-12431243-static unsigned long *riscv_iommu_pte_fetch(struct riscv_iommu_domain *domain,12441244- unsigned long iova, size_t *pte_pgsize)12451245-{12461246- unsigned long *ptr = domain->pgd_root;12471247- unsigned long pte;12481248- int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2;12491249-12501250- do {12511251- const int shift = PAGE_SHIFT + PT_SHIFT * level;12521252-12531253- ptr += ((iova >> shift) & (PTRS_PER_PTE - 1));12541254- pte = READ_ONCE(*ptr);12551255- if (_io_pte_present(pte) && _io_pte_leaf(pte)) {12561256- *pte_pgsize = (size_t)1 << shift;12571257- return ptr;12581258- }12591259- if (_io_pte_none(pte))12601260- return NULL;12611261- ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));12621262- } while (level-- > 0);12631263-12641264- return NULL;12651265-}12661266-12671267-static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain,12681268- unsigned long iova, phys_addr_t phys,12691269- size_t pgsize, size_t pgcount, int prot,12701270- gfp_t gfp, size_t *mapped)12711271-{12721272- struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);12731273- size_t size = 0;12741274- unsigned long *ptr;12751275- unsigned long pte, old, pte_prot;12761276- int rc = 0;12771277- struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist);12781278-12791279- if (!(prot & IOMMU_WRITE))12801280- pte_prot = _PAGE_BASE | _PAGE_READ;12811281- else if (domain->amo_enabled)12821282- pte_prot = _PAGE_BASE | _PAGE_READ | _PAGE_WRITE;12831283- else12841284- pte_prot = _PAGE_BASE | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY;12851285-12861286- while (pgcount) {12871287- ptr = riscv_iommu_pte_alloc(domain, iova, pgsize, gfp);12881288- if (!ptr) {12891289- rc = -ENOMEM;12901290- break;12911291- }12921292-12931293- old = READ_ONCE(*ptr);12941294- pte = _io_pte_entry(phys_to_pfn(phys), pte_prot);12951295- if (cmpxchg_relaxed(ptr, old, pte) != old)12961296- continue;12971297-12981298- riscv_iommu_pte_free(domain, old, &freelist);12991299-13001300- size += pgsize;13011301- iova += pgsize;13021302- phys += pgsize;13031303- --pgcount;13041304- }13051305-13061306- *mapped = size;13071307-13081308- if (!iommu_pages_list_empty(&freelist)) {10801080+ if (iommu_pages_list_empty(&gather->freelist)) {10811081+ riscv_iommu_iotlb_inval(domain, gather->start, gather->end);10821082+ } else {13091083 /*13101084 * In 1.0 spec version, the smallest scope we can use to13111085 * invalidate all levels of page table (i.e. leaf and non-leaf)···11651237 * capability.NL (non-leaf) IOTINVAL command.11661238 */11671239 riscv_iommu_iotlb_inval(domain, 0, ULONG_MAX);11681168- iommu_put_pages_list(&freelist);12401240+ iommu_put_pages_list(&gather->freelist);11691241 }11701170-11711171- return rc;11721172-}11731173-11741174-static size_t riscv_iommu_unmap_pages(struct iommu_domain *iommu_domain,11751175- unsigned long iova, size_t pgsize,11761176- size_t pgcount,11771177- struct iommu_iotlb_gather *gather)11781178-{11791179- struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);11801180- size_t size = pgcount << __ffs(pgsize);11811181- unsigned long *ptr, old;11821182- size_t unmapped = 0;11831183- size_t pte_size;11841184-11851185- while (unmapped < size) {11861186- ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size);11871187- if (!ptr)11881188- return unmapped;11891189-11901190- /* partial unmap is not allowed, fail. */11911191- if (iova & (pte_size - 1))11921192- return unmapped;11931193-11941194- old = READ_ONCE(*ptr);11951195- if (cmpxchg_relaxed(ptr, old, 0) != old)11961196- continue;11971197-11981198- iommu_iotlb_gather_add_page(&domain->domain, gather, iova,11991199- pte_size);12001200-12011201- iova += pte_size;12021202- unmapped += pte_size;12031203- }12041204-12051205- return unmapped;12061206-}12071207-12081208-static phys_addr_t riscv_iommu_iova_to_phys(struct iommu_domain *iommu_domain,12091209- dma_addr_t iova)12101210-{12111211- struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);12121212- size_t pte_size;12131213- unsigned long *ptr;12141214-12151215- ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size);12161216- if (!ptr)12171217- return 0;12181218-12191219- return pfn_to_phys(__page_val_to_pfn(*ptr)) | (iova & (pte_size - 1));12201242}1221124312221244static void riscv_iommu_free_paging_domain(struct iommu_domain *iommu_domain)12231245{12241246 struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);12251225- const unsigned long pfn = virt_to_pfn(domain->pgd_root);1226124712271248 WARN_ON(!list_empty(&domain->bonds));1228124912291250 if ((int)domain->pscid > 0)12301251 ida_free(&riscv_iommu_pscids, domain->pscid);1231125212321232- riscv_iommu_pte_free(domain, _io_pte_entry(pfn, _PAGE_TABLE), NULL);12531253+ pt_iommu_deinit(&domain->riscvpt.iommu);12331254 kfree(domain);12341255}12351256···12041327 struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);12051328 struct riscv_iommu_device *iommu = dev_to_iommu(dev);12061329 struct riscv_iommu_info *info = dev_iommu_priv_get(dev);13301330+ struct pt_iommu_riscv_64_hw_info pt_info;12071331 u64 fsc, ta;1208133212091209- if (!riscv_iommu_pt_supported(iommu, domain->pgd_mode))13331333+ pt_iommu_riscv_64_hw_info(&domain->riscvpt, &pt_info);13341334+13351335+ if (!riscv_iommu_pt_supported(iommu, pt_info.fsc_iosatp_mode))12101336 return -ENODEV;1211133712121212- fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, domain->pgd_mode) |12131213- FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, virt_to_pfn(domain->pgd_root));13381338+ fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, pt_info.fsc_iosatp_mode) |13391339+ FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, pt_info.ppn);12141340 ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, domain->pscid) |12151341 RISCV_IOMMU_PC_TA_V;12161342···12281348}1229134912301350static const struct iommu_domain_ops riscv_iommu_paging_domain_ops = {13511351+ IOMMU_PT_DOMAIN_OPS(riscv_64),12311352 .attach_dev = riscv_iommu_attach_paging_domain,12321353 .free = riscv_iommu_free_paging_domain,12331233- .map_pages = riscv_iommu_map_pages,12341234- .unmap_pages = riscv_iommu_unmap_pages,12351235- .iova_to_phys = riscv_iommu_iova_to_phys,12361354 .iotlb_sync = riscv_iommu_iotlb_sync,12371355 .flush_iotlb_all = riscv_iommu_iotlb_flush_all,12381356};1239135712401358static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev)12411359{13601360+ struct pt_iommu_riscv_64_cfg cfg = {};12421361 struct riscv_iommu_domain *domain;12431362 struct riscv_iommu_device *iommu;12441244- unsigned int pgd_mode;12451245- dma_addr_t va_mask;12461246- int va_bits;13631363+ int ret;1247136412481365 iommu = dev_to_iommu(dev);12491366 if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV57) {12501250- pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57;12511251- va_bits = 57;13671367+ cfg.common.hw_max_vasz_lg2 = 57;12521368 } else if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV48) {12531253- pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48;12541254- va_bits = 48;13691369+ cfg.common.hw_max_vasz_lg2 = 48;12551370 } else if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV39) {12561256- pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39;12571257- va_bits = 39;13711371+ cfg.common.hw_max_vasz_lg2 = 39;12581372 } else {12591373 dev_err(dev, "cannot find supported page table mode\n");12601374 return ERR_PTR(-ENODEV);12611375 }13761376+ cfg.common.hw_max_oasz_lg2 = 56;1262137712631378 domain = kzalloc_obj(*domain);12641379 if (!domain)···1261138612621387 INIT_LIST_HEAD_RCU(&domain->bonds);12631388 spin_lock_init(&domain->lock);12641264- domain->numa_node = dev_to_node(iommu->dev);12651265- domain->amo_enabled = !!(iommu->caps & RISCV_IOMMU_CAPABILITIES_AMO_HWAD);12661266- domain->pgd_mode = pgd_mode;12671267- domain->pgd_root = iommu_alloc_pages_node_sz(domain->numa_node,12681268- GFP_KERNEL_ACCOUNT, SZ_4K);12691269- if (!domain->pgd_root) {12701270- kfree(domain);12711271- return ERR_PTR(-ENOMEM);12721272- }13891389+ /*13901390+ * 6.4 IOMMU capabilities [..] IOMMU implementations must support the13911391+ * Svnapot standard extension for NAPOT Translation Contiguity.13921392+ */13931393+ cfg.common.features = BIT(PT_FEAT_SIGN_EXTEND) |13941394+ BIT(PT_FEAT_FLUSH_RANGE) |13951395+ BIT(PT_FEAT_RISCV_SVNAPOT_64K);13961396+ domain->riscvpt.iommu.nid = dev_to_node(iommu->dev);13971397+ domain->domain.ops = &riscv_iommu_paging_domain_ops;1273139812741399 domain->pscid = ida_alloc_range(&riscv_iommu_pscids, 1,12751400 RISCV_IOMMU_MAX_PSCID, GFP_KERNEL);12761401 if (domain->pscid < 0) {12771277- iommu_free_pages(domain->pgd_root);12781278- kfree(domain);14021402+ riscv_iommu_free_paging_domain(&domain->domain);12791403 return ERR_PTR(-ENOMEM);12801404 }1281140512821282- /*12831283- * Note: RISC-V Privilege spec mandates that virtual addresses12841284- * need to be sign-extended, so if (VA_BITS - 1) is set, all12851285- * bits >= VA_BITS need to also be set or else we'll get a12861286- * page fault. However the code that creates the mappings12871287- * above us (e.g. iommu_dma_alloc_iova()) won't do that for us12881288- * for now, so we'll end up with invalid virtual addresses12891289- * to map. As a workaround until we get this sorted out12901290- * limit the available virtual addresses to VA_BITS - 1.12911291- */12921292- va_mask = DMA_BIT_MASK(va_bits - 1);12931293-12941294- domain->domain.geometry.aperture_start = 0;12951295- domain->domain.geometry.aperture_end = va_mask;12961296- domain->domain.geometry.force_aperture = true;12971297- domain->domain.pgsize_bitmap = va_mask & (SZ_4K | SZ_2M | SZ_1G | SZ_512G);12981298-12991299- domain->domain.ops = &riscv_iommu_paging_domain_ops;13001300-14061406+ ret = pt_iommu_riscv_64_init(&domain->riscvpt, &cfg, GFP_KERNEL);14071407+ if (ret) {14081408+ riscv_iommu_free_paging_domain(&domain->domain);14091409+ return ERR_PTR(ret);14101410+ }13011411 return &domain->domain;13021412}13031413···13721512 * the device directory. Do not mark the context valid yet.13731513 */13741514 tc = 0;13751375- if (iommu->caps & RISCV_IOMMU_CAPABILITIES_AMO_HWAD)13761376- tc |= RISCV_IOMMU_DC_TC_SADE;13771515 for (i = 0; i < fwspec->num_ids; i++) {13781516 dc = riscv_iommu_get_dc(iommu, fwspec->ids[i]);13791517 if (!dc) {···15381680 riscv_iommu_queue_disable(&iommu->cmdq);15391681 return rc;15401682}16831683+16841684+MODULE_IMPORT_NS("GENERIC_PT_IOMMU");
+16
include/linux/generic_pt/common.h
···175175 PT_FEAT_VTDSS_FORCE_WRITEABLE,176176};177177178178+struct pt_riscv_32 {179179+ struct pt_common common;180180+};181181+182182+struct pt_riscv_64 {183183+ struct pt_common common;184184+};185185+186186+enum {187187+ /*188188+ * Support the 64k contiguous page size following the Svnapot extension.189189+ */190190+ PT_FEAT_RISCV_SVNAPOT_64K = PT_FEAT_FMT_START,191191+192192+};193193+178194struct pt_x86_64 {179195 struct pt_common common;180196};
+69-11
include/linux/generic_pt/iommu.h
···6666 struct device *iommu_device;6767};68686969+static inline struct pt_iommu *iommupt_from_domain(struct iommu_domain *domain)7070+{7171+ if (!IS_ENABLED(CONFIG_IOMMU_PT) || !domain->is_iommupt)7272+ return NULL;7373+ return container_of(domain, struct pt_iommu, domain);7474+}7575+6976/**7077 * struct pt_iommu_info - Details about the IOMMU page table7178 *···8780};88818982struct pt_iommu_ops {8383+ /**8484+ * @map_range: Install translation for an IOVA range8585+ * @iommu_table: Table to manipulate8686+ * @iova: IO virtual address to start8787+ * @paddr: Physical/Output address to start8888+ * @len: Length of the range starting from @iova8989+ * @prot: A bitmap of IOMMU_READ/WRITE/CACHE/NOEXEC/MMIO9090+ * @gfp: GFP flags for any memory allocations9191+ *9292+ * The range starting at IOVA will have paddr installed into it. The9393+ * rage is automatically segmented into optimally sized table entries,9494+ * and can have any valid alignment.9595+ *9696+ * On error the caller will probably want to invoke unmap on the range9797+ * from iova up to the amount indicated by @mapped to return the table9898+ * back to an unchanged state.9999+ *100100+ * Context: The caller must hold a write range lock that includes101101+ * the whole range.102102+ *103103+ * Returns: -ERRNO on failure, 0 on success. The number of bytes of VA104104+ * that were mapped are added to @mapped, @mapped is not zerod first.105105+ */106106+ int (*map_range)(struct pt_iommu *iommu_table, dma_addr_t iova,107107+ phys_addr_t paddr, dma_addr_t len, unsigned int prot,108108+ gfp_t gfp, size_t *mapped);109109+110110+ /**111111+ * @unmap_range: Make a range of IOVA empty/not present112112+ * @iommu_table: Table to manipulate113113+ * @iova: IO virtual address to start114114+ * @len: Length of the range starting from @iova115115+ * @iotlb_gather: Gather struct that must be flushed on return116116+ *117117+ * unmap_range() will remove a translation created by map_range(). It118118+ * cannot subdivide a mapping created by map_range(), so it should be119119+ * called with IOVA ranges that match those passed to map_pages. The120120+ * IOVA range can aggregate contiguous map_range() calls so long as no121121+ * individual range is split.122122+ *123123+ * Context: The caller must hold a write range lock that includes124124+ * the whole range.125125+ *126126+ * Returns: Number of bytes of VA unmapped. iova + res will be the127127+ * point unmapping stopped.128128+ */129129+ size_t (*unmap_range)(struct pt_iommu *iommu_table, dma_addr_t iova,130130+ dma_addr_t len,131131+ struct iommu_iotlb_gather *iotlb_gather);132132+90133 /**91134 * @set_dirty: Make the iova write dirty92135 * @iommu_table: Table to manipulate···251194#define IOMMU_PROTOTYPES(fmt) \252195 phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \253196 dma_addr_t iova); \254254- int pt_iommu_##fmt##_map_pages(struct iommu_domain *domain, \255255- unsigned long iova, phys_addr_t paddr, \256256- size_t pgsize, size_t pgcount, \257257- int prot, gfp_t gfp, size_t *mapped); \258258- size_t pt_iommu_##fmt##_unmap_pages( \259259- struct iommu_domain *domain, unsigned long iova, \260260- size_t pgsize, size_t pgcount, \261261- struct iommu_iotlb_gather *iotlb_gather); \262197 int pt_iommu_##fmt##_read_and_clear_dirty( \263198 struct iommu_domain *domain, unsigned long iova, size_t size, \264199 unsigned long flags, struct iommu_dirty_bitmap *dirty); \···271222 * iommu_pt272223 */273224#define IOMMU_PT_DOMAIN_OPS(fmt) \274274- .iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, \275275- .map_pages = &pt_iommu_##fmt##_map_pages, \276276- .unmap_pages = &pt_iommu_##fmt##_unmap_pages225225+ .iova_to_phys = &pt_iommu_##fmt##_iova_to_phys277226#define IOMMU_PT_DIRTY_OPS(fmt) \278227 .read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty279228···321274};322275323276IOMMU_FORMAT(vtdss, vtdss_pt);277277+278278+struct pt_iommu_riscv_64_cfg {279279+ struct pt_iommu_cfg common;280280+};281281+282282+struct pt_iommu_riscv_64_hw_info {283283+ u64 ppn;284284+ u8 fsc_iosatp_mode;285285+};286286+287287+IOMMU_FORMAT(riscv_64, riscv_64pt);324288325289struct pt_iommu_x86_64_cfg {326290 struct pt_iommu_cfg common;
+5-1
include/linux/iommu.h
···223223struct iommu_domain {224224 unsigned type;225225 enum iommu_domain_cookie_type cookie_type;226226+ bool is_iommupt;226227 const struct iommu_domain_ops *ops;227228 const struct iommu_dirty_ops *dirty_ops;228229 const struct iommu_ops *owner; /* Whose domain_alloc we came from */···272271 */273272 IOMMU_CAP_DEFERRED_FLUSH,274273 IOMMU_CAP_DIRTY_TRACKING, /* IOMMU supports dirty tracking */274274+ /* ATS is supported and may be enabled for this device */275275+ IOMMU_CAP_PCI_ATS_SUPPORTED,275276};276277277278/* These are the possible reserved region types */···983980static inline void iommu_iotlb_sync(struct iommu_domain *domain,984981 struct iommu_iotlb_gather *iotlb_gather)985982{986986- if (domain->ops->iotlb_sync)983983+ if (domain->ops->iotlb_sync &&984984+ likely(iotlb_gather->start < iotlb_gather->end))987985 domain->ops->iotlb_sync(domain, iotlb_gather);988986989987 iommu_iotlb_gather_init(iotlb_gather);
+9
include/uapi/linux/iommufd.h
···695695 * @IOMMU_HW_CAP_PCI_PASID_PRIV: Privileged Mode Supported, user ignores it696696 * when the struct697697 * iommu_hw_info::out_max_pasid_log2 is zero.698698+ * @IOMMU_HW_CAP_PCI_ATS_NOT_SUPPORTED: ATS is not supported or cannot be used699699+ * on this device (absence implies ATS700700+ * may be enabled)698701 */699702enum iommufd_hw_capabilities {700703 IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0,701704 IOMMU_HW_CAP_PCI_PASID_EXEC = 1 << 1,702705 IOMMU_HW_CAP_PCI_PASID_PRIV = 1 << 2,706706+ IOMMU_HW_CAP_PCI_ATS_NOT_SUPPORTED = 1 << 3,703707};704708705709/**···10561052enum iommu_viommu_type {10571053 IOMMU_VIOMMU_TYPE_DEFAULT = 0,10581054 IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1,10551055+ /*10561056+ * TEGRA241_CMDQV requirements (otherwise, VCMDQs will not work)10571057+ * - Kernel will allocate a VINTF (HYP_OWN=0) to back this VIOMMU. So,10581058+ * VMM must wire the HYP_OWN bit to 0 in guest VINTF_CONFIG register10591059+ */10591060 IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV = 2,10601061};10611062