Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

iommupt: Add unmap_pages op

unmap_pages removes mappings and any fully contained interior tables from
the given range. This follows the now-standard iommu_domain API definition
where it does not split up larger page sizes into smaller. The caller must
perform unmap only on ranges created by map or it must have somehow
otherwise determined safe cut points (eg iommufd/vfio use iova_to_phys to
scan for them)

A future work will provide 'cut' which explicitly does the page size split
if the HW can support it.

unmap is implemented with a recursive descent of the tree. If the caller
provides a VA range that spans an entire table item then the table memory
can be freed as well.

If an entire table item can be freed then this version will also check the
leaf-only level of the tree to ensure that all entries are present to
generate -EINVAL. Many of the existing drivers don't do this extra check.

This version sits under the iommu_domain_ops as unmap_pages() but does not
require the external page size calculation. The implementation is actually
unmap_range() and can do arbitrary ranges, internally handling all the
validation and supporting any arrangment of page sizes. A future series
can optimize __iommu_unmap() to take advantage of this.

Freed page table memory is batched up in the gather and will be freed in
the driver's iotlb_sync() callback after the IOTLB flush completes.

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Samiullah Khawaja <skhawaja@google.com>
Tested-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Tested-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

authored by

Jason Gunthorpe and committed by
Joerg Roedel
7c53f423 9d4c274c

+164 -2
+156
drivers/iommu/generic_pt/iommu_pt.h
··· 14 14 #include <linux/export.h> 15 15 #include <linux/iommu.h> 16 16 #include "../iommu-pages.h" 17 + #include <linux/cleanup.h> 18 + #include <linux/dma-mapping.h> 19 + 20 + static void gather_range_pages(struct iommu_iotlb_gather *iotlb_gather, 21 + struct pt_iommu *iommu_table, pt_vaddr_t iova, 22 + pt_vaddr_t len, 23 + struct iommu_pages_list *free_list) 24 + { 25 + struct pt_common *common = common_from_iommu(iommu_table); 26 + 27 + if (pt_feature(common, PT_FEAT_FLUSH_RANGE_NO_GAPS) && 28 + iommu_iotlb_gather_is_disjoint(iotlb_gather, iova, len)) { 29 + iommu_iotlb_sync(&iommu_table->domain, iotlb_gather); 30 + /* 31 + * Note that the sync frees the gather's free list, so we must 32 + * not have any pages on that list that are covered by iova/len 33 + */ 34 + } else if (pt_feature(common, PT_FEAT_FLUSH_RANGE)) { 35 + iommu_iotlb_gather_add_range(iotlb_gather, iova, len); 36 + } 37 + 38 + iommu_pages_list_splice(free_list, &iotlb_gather->freelist); 39 + } 17 40 18 41 #define DOMAIN_NS(op) CONCATENATE(CONCATENATE(pt_iommu_, PTPFX), op) 19 42 ··· 186 163 iommu_table->nid, gfp, 187 164 log2_to_int(pt_top_memsize_lg2(common, top_of_table))); 188 165 } 166 + 167 + struct pt_unmap_args { 168 + struct iommu_pages_list free_list; 169 + pt_vaddr_t unmapped; 170 + }; 171 + 172 + static __maybe_unused int __unmap_range(struct pt_range *range, void *arg, 173 + unsigned int level, 174 + struct pt_table_p *table) 175 + { 176 + struct pt_state pts = pt_init(range, level, table); 177 + struct pt_unmap_args *unmap = arg; 178 + unsigned int num_oas = 0; 179 + unsigned int start_index; 180 + int ret = 0; 181 + 182 + _pt_iter_first(&pts); 183 + start_index = pts.index; 184 + pts.type = pt_load_entry_raw(&pts); 185 + /* 186 + * A starting index is in the middle of a contiguous entry 187 + * 188 + * The IOMMU API does not require drivers to support unmapping parts of 189 + * large pages. Long ago VFIO would try to split maps but the current 190 + * version never does. 191 + * 192 + * Instead when unmap reaches a partial unmap of the start of a large 193 + * IOPTE it should remove the entire IOPTE and return that size to the 194 + * caller. 195 + */ 196 + if (pts.type == PT_ENTRY_OA) { 197 + if (log2_mod(range->va, pt_entry_oa_lg2sz(&pts))) 198 + return -EINVAL; 199 + /* Micro optimization */ 200 + goto start_oa; 201 + } 202 + 203 + do { 204 + if (pts.type != PT_ENTRY_OA) { 205 + bool fully_covered; 206 + 207 + if (pts.type != PT_ENTRY_TABLE) { 208 + ret = -EINVAL; 209 + break; 210 + } 211 + 212 + if (pts.index != start_index) 213 + pt_index_to_va(&pts); 214 + pts.table_lower = pt_table_ptr(&pts); 215 + 216 + fully_covered = pt_entry_fully_covered( 217 + &pts, pt_table_item_lg2sz(&pts)); 218 + 219 + ret = pt_descend(&pts, arg, __unmap_range); 220 + if (ret) 221 + break; 222 + 223 + /* 224 + * If the unmapping range fully covers the table then we 225 + * can free it as well. The clear is delayed until we 226 + * succeed in clearing the lower table levels. 227 + */ 228 + if (fully_covered) { 229 + iommu_pages_list_add(&unmap->free_list, 230 + pts.table_lower); 231 + pt_clear_entries(&pts, ilog2(1)); 232 + } 233 + pts.index++; 234 + } else { 235 + unsigned int num_contig_lg2; 236 + start_oa: 237 + /* 238 + * If the caller requested an last that falls within a 239 + * single entry then the entire entry is unmapped and 240 + * the length returned will be larger than requested. 241 + */ 242 + num_contig_lg2 = pt_entry_num_contig_lg2(&pts); 243 + pt_clear_entries(&pts, num_contig_lg2); 244 + num_oas += log2_to_int(num_contig_lg2); 245 + pts.index += log2_to_int(num_contig_lg2); 246 + } 247 + if (pts.index >= pts.end_index) 248 + break; 249 + pts.type = pt_load_entry_raw(&pts); 250 + } while (true); 251 + 252 + unmap->unmapped += log2_mul(num_oas, pt_table_item_lg2sz(&pts)); 253 + return ret; 254 + } 255 + 256 + /** 257 + * unmap_pages() - Make a range of IOVA empty/not present 258 + * @domain: Domain to manipulate 259 + * @iova: IO virtual address to start 260 + * @pgsize: Length of each page 261 + * @pgcount: Length of the range in pgsize units starting from @iova 262 + * @iotlb_gather: Gather struct that must be flushed on return 263 + * 264 + * unmap_pages() will remove a translation created by map_pages(). It cannot 265 + * subdivide a mapping created by map_pages(), so it should be called with IOVA 266 + * ranges that match those passed to map_pages(). The IOVA range can aggregate 267 + * contiguous map_pages() calls so long as no individual range is split. 268 + * 269 + * Context: The caller must hold a write range lock that includes 270 + * the whole range. 271 + * 272 + * Returns: Number of bytes of VA unmapped. iova + res will be the point 273 + * unmapping stopped. 274 + */ 275 + size_t DOMAIN_NS(unmap_pages)(struct iommu_domain *domain, unsigned long iova, 276 + size_t pgsize, size_t pgcount, 277 + struct iommu_iotlb_gather *iotlb_gather) 278 + { 279 + struct pt_iommu *iommu_table = 280 + container_of(domain, struct pt_iommu, domain); 281 + struct pt_unmap_args unmap = { .free_list = IOMMU_PAGES_LIST_INIT( 282 + unmap.free_list) }; 283 + pt_vaddr_t len = pgsize * pgcount; 284 + struct pt_range range; 285 + int ret; 286 + 287 + ret = make_range(common_from_iommu(iommu_table), &range, iova, len); 288 + if (ret) 289 + return 0; 290 + 291 + pt_walk_range(&range, __unmap_range, &unmap); 292 + 293 + gather_range_pages(iotlb_gather, iommu_table, iova, len, 294 + &unmap.free_list); 295 + 296 + return unmap.unmapped; 297 + } 298 + EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(unmap_pages), "GENERIC_PT_IOMMU"); 189 299 190 300 static void NS(get_info)(struct pt_iommu *iommu_table, 191 301 struct pt_iommu_info *info)
+8 -2
include/linux/generic_pt/iommu.h
··· 9 9 #include <linux/iommu.h> 10 10 #include <linux/mm_types.h> 11 11 12 + struct iommu_iotlb_gather; 12 13 struct pt_iommu_ops; 13 14 14 15 /** ··· 120 119 #define IOMMU_PROTOTYPES(fmt) \ 121 120 phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \ 122 121 dma_addr_t iova); \ 122 + size_t pt_iommu_##fmt##_unmap_pages( \ 123 + struct iommu_domain *domain, unsigned long iova, \ 124 + size_t pgsize, size_t pgcount, \ 125 + struct iommu_iotlb_gather *iotlb_gather); \ 123 126 int pt_iommu_##fmt##_init(struct pt_iommu_##fmt *table, \ 124 127 const struct pt_iommu_##fmt##_cfg *cfg, \ 125 128 gfp_t gfp); \ ··· 140 135 * A driver uses IOMMU_PT_DOMAIN_OPS to populate the iommu_domain_ops for the 141 136 * iommu_pt 142 137 */ 143 - #define IOMMU_PT_DOMAIN_OPS(fmt) \ 144 - .iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, 138 + #define IOMMU_PT_DOMAIN_OPS(fmt) \ 139 + .iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, \ 140 + .unmap_pages = &pt_iommu_##fmt##_unmap_pages 145 141 146 142 /* 147 143 * The driver should setup its domain struct like