Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

iommu/riscv: Use the generic iommu page table

This is a fairly straightforward conversion of the RISC-V iommu driver to
use the generic iommu page table code.

Invalidation stays as it is now with the driver pretending to implement
simple range based invalidation even though the HW is more like ARM SMMUv3
than AMD where the HW implements a single-PTE based invalidation. Future
work to extend the generic invalidate mechanism to support more ARM-like
semantics would benefit this driver as well.

Delete the existing page table code.

Tested-by: Vincent Chen <vincent.chen@sifive.com>
Acked-by: Paul Walmsley <pjw@kernel.org> # arch/riscv
Reviewed-by: Tomasz Jeznach <tjeznach@rivosinc.com>
Tested-by: Tomasz Jeznach <tjeznach@rivosinc.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

authored by

Jason Gunthorpe and committed by
Joerg Roedel
e5ef3219 e93e4a63

+39 -251
+3
drivers/iommu/riscv/Kconfig
··· 6 6 depends on RISCV && 64BIT 7 7 default y 8 8 select IOMMU_API 9 + select GENERIC_PT 10 + select IOMMU_PT 11 + select IOMMU_PT_RISCV64 9 12 help 10 13 Support for implementations of the RISC-V IOMMU architecture that 11 14 complements the RISC-V MMU capabilities, providing similar address
+36 -251
drivers/iommu/riscv/iommu.c
··· 21 21 #include <linux/iopoll.h> 22 22 #include <linux/kernel.h> 23 23 #include <linux/pci.h> 24 + #include <linux/generic_pt/iommu.h> 24 25 25 26 #include "../iommu-pages.h" 26 27 #include "iommu-bits.h" ··· 807 806 808 807 /* This struct contains protection domain specific IOMMU driver data. */ 809 808 struct riscv_iommu_domain { 810 - struct iommu_domain domain; 809 + union { 810 + struct iommu_domain domain; 811 + struct pt_iommu_riscv_64 riscvpt; 812 + }; 811 813 struct list_head bonds; 812 814 spinlock_t lock; /* protect bonds list updates. */ 813 815 int pscid; 814 - int numa_node; 815 - unsigned int pgd_mode; 816 - unsigned long *pgd_root; 817 816 }; 817 + PT_IOMMU_CHECK_DOMAIN(struct riscv_iommu_domain, riscvpt.iommu, domain); 818 818 819 819 #define iommu_domain_to_riscv(iommu_domain) \ 820 820 container_of(iommu_domain, struct riscv_iommu_domain, domain) ··· 1078 1076 { 1079 1077 struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); 1080 1078 1081 - riscv_iommu_iotlb_inval(domain, gather->start, gather->end); 1082 - } 1083 - 1084 - #define PT_SHIFT (PAGE_SHIFT - ilog2(sizeof(pte_t))) 1085 - 1086 - #define _io_pte_present(pte) ((pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE)) 1087 - #define _io_pte_leaf(pte) ((pte) & _PAGE_LEAF) 1088 - #define _io_pte_none(pte) ((pte) == 0) 1089 - #define _io_pte_entry(pn, prot) ((_PAGE_PFN_MASK & ((pn) << _PAGE_PFN_SHIFT)) | (prot)) 1090 - 1091 - static void riscv_iommu_pte_free(struct riscv_iommu_domain *domain, 1092 - unsigned long pte, 1093 - struct iommu_pages_list *freelist) 1094 - { 1095 - unsigned long *ptr; 1096 - int i; 1097 - 1098 - if (!_io_pte_present(pte) || _io_pte_leaf(pte)) 1099 - return; 1100 - 1101 - ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte)); 1102 - 1103 - /* Recursively free all sub page table pages */ 1104 - for (i = 0; i < PTRS_PER_PTE; i++) { 1105 - pte = READ_ONCE(ptr[i]); 1106 - if (!_io_pte_none(pte) && cmpxchg_relaxed(ptr + i, pte, 0) == pte) 1107 - riscv_iommu_pte_free(domain, pte, freelist); 1108 - } 1109 - 1110 - if (freelist) 1111 - iommu_pages_list_add(freelist, ptr); 1112 - else 1113 - iommu_free_pages(ptr); 1114 - } 1115 - 1116 - static unsigned long *riscv_iommu_pte_alloc(struct riscv_iommu_domain *domain, 1117 - unsigned long iova, size_t pgsize, 1118 - gfp_t gfp) 1119 - { 1120 - unsigned long *ptr = domain->pgd_root; 1121 - unsigned long pte, old; 1122 - int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2; 1123 - void *addr; 1124 - 1125 - do { 1126 - const int shift = PAGE_SHIFT + PT_SHIFT * level; 1127 - 1128 - ptr += ((iova >> shift) & (PTRS_PER_PTE - 1)); 1129 - /* 1130 - * Note: returned entry might be a non-leaf if there was 1131 - * existing mapping with smaller granularity. Up to the caller 1132 - * to replace and invalidate. 1133 - */ 1134 - if (((size_t)1 << shift) == pgsize) 1135 - return ptr; 1136 - pte_retry: 1137 - pte = READ_ONCE(*ptr); 1138 - /* 1139 - * This is very likely incorrect as we should not be adding 1140 - * new mapping with smaller granularity on top 1141 - * of existing 2M/1G mapping. Fail. 1142 - */ 1143 - if (_io_pte_present(pte) && _io_pte_leaf(pte)) 1144 - return NULL; 1145 - /* 1146 - * Non-leaf entry is missing, allocate and try to add to the 1147 - * page table. This might race with other mappings, retry. 1148 - */ 1149 - if (_io_pte_none(pte)) { 1150 - addr = iommu_alloc_pages_node_sz(domain->numa_node, gfp, 1151 - SZ_4K); 1152 - if (!addr) 1153 - return NULL; 1154 - old = pte; 1155 - pte = _io_pte_entry(virt_to_pfn(addr), _PAGE_TABLE); 1156 - if (cmpxchg_relaxed(ptr, old, pte) != old) { 1157 - iommu_free_pages(addr); 1158 - goto pte_retry; 1159 - } 1160 - } 1161 - ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte)); 1162 - } while (level-- > 0); 1163 - 1164 - return NULL; 1165 - } 1166 - 1167 - static unsigned long *riscv_iommu_pte_fetch(struct riscv_iommu_domain *domain, 1168 - unsigned long iova, size_t *pte_pgsize) 1169 - { 1170 - unsigned long *ptr = domain->pgd_root; 1171 - unsigned long pte; 1172 - int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2; 1173 - 1174 - do { 1175 - const int shift = PAGE_SHIFT + PT_SHIFT * level; 1176 - 1177 - ptr += ((iova >> shift) & (PTRS_PER_PTE - 1)); 1178 - pte = READ_ONCE(*ptr); 1179 - if (_io_pte_present(pte) && _io_pte_leaf(pte)) { 1180 - *pte_pgsize = (size_t)1 << shift; 1181 - return ptr; 1182 - } 1183 - if (_io_pte_none(pte)) 1184 - return NULL; 1185 - ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte)); 1186 - } while (level-- > 0); 1187 - 1188 - return NULL; 1189 - } 1190 - 1191 - static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain, 1192 - unsigned long iova, phys_addr_t phys, 1193 - size_t pgsize, size_t pgcount, int prot, 1194 - gfp_t gfp, size_t *mapped) 1195 - { 1196 - struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); 1197 - size_t size = 0; 1198 - unsigned long *ptr; 1199 - unsigned long pte, old, pte_prot; 1200 - int rc = 0; 1201 - struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist); 1202 - 1203 - if (!(prot & IOMMU_WRITE)) 1204 - pte_prot = _PAGE_BASE | _PAGE_READ; 1205 - else 1206 - pte_prot = _PAGE_BASE | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY; 1207 - 1208 - while (pgcount) { 1209 - ptr = riscv_iommu_pte_alloc(domain, iova, pgsize, gfp); 1210 - if (!ptr) { 1211 - rc = -ENOMEM; 1212 - break; 1213 - } 1214 - 1215 - old = READ_ONCE(*ptr); 1216 - pte = _io_pte_entry(phys_to_pfn(phys), pte_prot); 1217 - if (cmpxchg_relaxed(ptr, old, pte) != old) 1218 - continue; 1219 - 1220 - riscv_iommu_pte_free(domain, old, &freelist); 1221 - 1222 - size += pgsize; 1223 - iova += pgsize; 1224 - phys += pgsize; 1225 - --pgcount; 1226 - } 1227 - 1228 - *mapped = size; 1229 - 1230 - if (!iommu_pages_list_empty(&freelist)) { 1079 + if (iommu_pages_list_empty(&gather->freelist)) { 1080 + riscv_iommu_iotlb_inval(domain, gather->start, gather->end); 1081 + } else { 1231 1082 /* 1232 1083 * In 1.0 spec version, the smallest scope we can use to 1233 1084 * invalidate all levels of page table (i.e. leaf and non-leaf) ··· 1089 1234 * capability.NL (non-leaf) IOTINVAL command. 1090 1235 */ 1091 1236 riscv_iommu_iotlb_inval(domain, 0, ULONG_MAX); 1092 - iommu_put_pages_list(&freelist); 1237 + iommu_put_pages_list(&gather->freelist); 1093 1238 } 1094 - 1095 - return rc; 1096 - } 1097 - 1098 - static size_t riscv_iommu_unmap_pages(struct iommu_domain *iommu_domain, 1099 - unsigned long iova, size_t pgsize, 1100 - size_t pgcount, 1101 - struct iommu_iotlb_gather *gather) 1102 - { 1103 - struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); 1104 - size_t size = pgcount << __ffs(pgsize); 1105 - unsigned long *ptr, old; 1106 - size_t unmapped = 0; 1107 - size_t pte_size; 1108 - 1109 - while (unmapped < size) { 1110 - ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size); 1111 - if (!ptr) 1112 - return unmapped; 1113 - 1114 - /* partial unmap is not allowed, fail. */ 1115 - if (iova & (pte_size - 1)) 1116 - return unmapped; 1117 - 1118 - old = READ_ONCE(*ptr); 1119 - if (cmpxchg_relaxed(ptr, old, 0) != old) 1120 - continue; 1121 - 1122 - iommu_iotlb_gather_add_page(&domain->domain, gather, iova, 1123 - pte_size); 1124 - 1125 - iova += pte_size; 1126 - unmapped += pte_size; 1127 - } 1128 - 1129 - return unmapped; 1130 - } 1131 - 1132 - static phys_addr_t riscv_iommu_iova_to_phys(struct iommu_domain *iommu_domain, 1133 - dma_addr_t iova) 1134 - { 1135 - struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); 1136 - size_t pte_size; 1137 - unsigned long *ptr; 1138 - 1139 - ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size); 1140 - if (!ptr) 1141 - return 0; 1142 - 1143 - return pfn_to_phys(__page_val_to_pfn(*ptr)) | (iova & (pte_size - 1)); 1144 1239 } 1145 1240 1146 1241 static void riscv_iommu_free_paging_domain(struct iommu_domain *iommu_domain) 1147 1242 { 1148 1243 struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); 1149 - const unsigned long pfn = virt_to_pfn(domain->pgd_root); 1150 1244 1151 1245 WARN_ON(!list_empty(&domain->bonds)); 1152 1246 1153 1247 if ((int)domain->pscid > 0) 1154 1248 ida_free(&riscv_iommu_pscids, domain->pscid); 1155 1249 1156 - riscv_iommu_pte_free(domain, _io_pte_entry(pfn, _PAGE_TABLE), NULL); 1250 + pt_iommu_deinit(&domain->riscvpt.iommu); 1157 1251 kfree(domain); 1158 1252 } 1159 1253 ··· 1128 1324 struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); 1129 1325 struct riscv_iommu_device *iommu = dev_to_iommu(dev); 1130 1326 struct riscv_iommu_info *info = dev_iommu_priv_get(dev); 1327 + struct pt_iommu_riscv_64_hw_info pt_info; 1131 1328 u64 fsc, ta; 1132 1329 1133 - if (!riscv_iommu_pt_supported(iommu, domain->pgd_mode)) 1330 + pt_iommu_riscv_64_hw_info(&domain->riscvpt, &pt_info); 1331 + 1332 + if (!riscv_iommu_pt_supported(iommu, pt_info.fsc_iosatp_mode)) 1134 1333 return -ENODEV; 1135 1334 1136 - fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, domain->pgd_mode) | 1137 - FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, virt_to_pfn(domain->pgd_root)); 1335 + fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, pt_info.fsc_iosatp_mode) | 1336 + FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, pt_info.ppn); 1138 1337 ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, domain->pscid) | 1139 1338 RISCV_IOMMU_PC_TA_V; 1140 1339 ··· 1152 1345 } 1153 1346 1154 1347 static const struct iommu_domain_ops riscv_iommu_paging_domain_ops = { 1348 + IOMMU_PT_DOMAIN_OPS(riscv_64), 1155 1349 .attach_dev = riscv_iommu_attach_paging_domain, 1156 1350 .free = riscv_iommu_free_paging_domain, 1157 - .map_pages = riscv_iommu_map_pages, 1158 - .unmap_pages = riscv_iommu_unmap_pages, 1159 - .iova_to_phys = riscv_iommu_iova_to_phys, 1160 1351 .iotlb_sync = riscv_iommu_iotlb_sync, 1161 1352 .flush_iotlb_all = riscv_iommu_iotlb_flush_all, 1162 1353 }; 1163 1354 1164 1355 static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev) 1165 1356 { 1357 + struct pt_iommu_riscv_64_cfg cfg = {}; 1166 1358 struct riscv_iommu_domain *domain; 1167 1359 struct riscv_iommu_device *iommu; 1168 - unsigned int pgd_mode; 1169 - dma_addr_t va_mask; 1170 - int va_bits; 1360 + int ret; 1171 1361 1172 1362 iommu = dev_to_iommu(dev); 1173 1363 if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV57) { 1174 - pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57; 1175 - va_bits = 57; 1364 + cfg.common.hw_max_vasz_lg2 = 57; 1176 1365 } else if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV48) { 1177 - pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48; 1178 - va_bits = 48; 1366 + cfg.common.hw_max_vasz_lg2 = 48; 1179 1367 } else if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV39) { 1180 - pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39; 1181 - va_bits = 39; 1368 + cfg.common.hw_max_vasz_lg2 = 39; 1182 1369 } else { 1183 1370 dev_err(dev, "cannot find supported page table mode\n"); 1184 1371 return ERR_PTR(-ENODEV); 1185 1372 } 1373 + cfg.common.hw_max_oasz_lg2 = 56; 1186 1374 1187 1375 domain = kzalloc_obj(*domain); 1188 1376 if (!domain) ··· 1185 1383 1186 1384 INIT_LIST_HEAD_RCU(&domain->bonds); 1187 1385 spin_lock_init(&domain->lock); 1188 - domain->numa_node = dev_to_node(iommu->dev); 1189 - domain->pgd_mode = pgd_mode; 1190 - domain->pgd_root = iommu_alloc_pages_node_sz(domain->numa_node, 1191 - GFP_KERNEL_ACCOUNT, SZ_4K); 1192 - if (!domain->pgd_root) { 1193 - kfree(domain); 1194 - return ERR_PTR(-ENOMEM); 1195 - } 1386 + cfg.common.features = BIT(PT_FEAT_SIGN_EXTEND) | 1387 + BIT(PT_FEAT_FLUSH_RANGE); 1388 + domain->riscvpt.iommu.nid = dev_to_node(iommu->dev); 1389 + domain->domain.ops = &riscv_iommu_paging_domain_ops; 1196 1390 1197 1391 domain->pscid = ida_alloc_range(&riscv_iommu_pscids, 1, 1198 1392 RISCV_IOMMU_MAX_PSCID, GFP_KERNEL); 1199 1393 if (domain->pscid < 0) { 1200 - iommu_free_pages(domain->pgd_root); 1201 - kfree(domain); 1394 + riscv_iommu_free_paging_domain(&domain->domain); 1202 1395 return ERR_PTR(-ENOMEM); 1203 1396 } 1204 1397 1205 - /* 1206 - * Note: RISC-V Privilege spec mandates that virtual addresses 1207 - * need to be sign-extended, so if (VA_BITS - 1) is set, all 1208 - * bits >= VA_BITS need to also be set or else we'll get a 1209 - * page fault. However the code that creates the mappings 1210 - * above us (e.g. iommu_dma_alloc_iova()) won't do that for us 1211 - * for now, so we'll end up with invalid virtual addresses 1212 - * to map. As a workaround until we get this sorted out 1213 - * limit the available virtual addresses to VA_BITS - 1. 1214 - */ 1215 - va_mask = DMA_BIT_MASK(va_bits - 1); 1216 - 1217 - domain->domain.geometry.aperture_start = 0; 1218 - domain->domain.geometry.aperture_end = va_mask; 1219 - domain->domain.geometry.force_aperture = true; 1220 - domain->domain.pgsize_bitmap = va_mask & (SZ_4K | SZ_2M | SZ_1G | SZ_512G); 1221 - 1222 - domain->domain.ops = &riscv_iommu_paging_domain_ops; 1223 - 1398 + ret = pt_iommu_riscv_64_init(&domain->riscvpt, &cfg, GFP_KERNEL); 1399 + if (ret) { 1400 + riscv_iommu_free_paging_domain(&domain->domain); 1401 + return ERR_PTR(ret); 1402 + } 1224 1403 return &domain->domain; 1225 1404 } 1226 1405 ··· 1457 1674 riscv_iommu_queue_disable(&iommu->cmdq); 1458 1675 return rc; 1459 1676 } 1677 + 1678 + MODULE_IMPORT_NS("GENERIC_PT_IOMMU");