Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

iommupt: Add read_and_clear_dirty op

IOMMU HW now supports updating a dirty bit in an entry when a DMA writes
to the entry's VA range. iommufd has a uAPI to read and clear the dirty
bits from the tables.

This is a trivial recursive descent algorithm to read and optionally clear
the dirty bits. The format needs a function to tell if a contiguous entry
is dirty, and a function to clear a contiguous entry back to clean.

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Samiullah Khawaja <skhawaja@google.com>
Tested-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Tested-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

authored by

Jason Gunthorpe and committed by
Joerg Roedel
4a00f943 dcd6a011

+110
+104
drivers/iommu/generic_pt/iommu_pt.h
··· 162 162 } 163 163 EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(iova_to_phys), "GENERIC_PT_IOMMU"); 164 164 165 + struct pt_iommu_dirty_args { 166 + struct iommu_dirty_bitmap *dirty; 167 + unsigned int flags; 168 + }; 169 + 170 + static void record_dirty(struct pt_state *pts, 171 + struct pt_iommu_dirty_args *dirty, 172 + unsigned int num_contig_lg2) 173 + { 174 + pt_vaddr_t dirty_len; 175 + 176 + if (num_contig_lg2 != ilog2(1)) { 177 + unsigned int index = pts->index; 178 + unsigned int end_index = log2_set_mod_max_t( 179 + unsigned int, pts->index, num_contig_lg2); 180 + 181 + /* Adjust for being contained inside a contiguous page */ 182 + end_index = min(end_index, pts->end_index); 183 + dirty_len = (end_index - index) * 184 + log2_to_int(pt_table_item_lg2sz(pts)); 185 + } else { 186 + dirty_len = log2_to_int(pt_table_item_lg2sz(pts)); 187 + } 188 + 189 + if (dirty->dirty->bitmap) 190 + iova_bitmap_set(dirty->dirty->bitmap, pts->range->va, 191 + dirty_len); 192 + 193 + if (!(dirty->flags & IOMMU_DIRTY_NO_CLEAR)) { 194 + pt_entry_make_write_clean(pts); 195 + iommu_iotlb_gather_add_range(dirty->dirty->gather, 196 + pts->range->va, dirty_len); 197 + } 198 + } 199 + 200 + static inline int __read_and_clear_dirty(struct pt_range *range, void *arg, 201 + unsigned int level, 202 + struct pt_table_p *table) 203 + { 204 + struct pt_state pts = pt_init(range, level, table); 205 + struct pt_iommu_dirty_args *dirty = arg; 206 + int ret; 207 + 208 + for_each_pt_level_entry(&pts) { 209 + if (pts.type == PT_ENTRY_TABLE) { 210 + ret = pt_descend(&pts, arg, __read_and_clear_dirty); 211 + if (ret) 212 + return ret; 213 + continue; 214 + } 215 + if (pts.type == PT_ENTRY_OA && pt_entry_is_write_dirty(&pts)) 216 + record_dirty(&pts, dirty, 217 + pt_entry_num_contig_lg2(&pts)); 218 + } 219 + return 0; 220 + } 221 + 222 + /** 223 + * read_and_clear_dirty() - Manipulate the HW set write dirty state 224 + * @domain: Domain to manipulate 225 + * @iova: IO virtual address to start 226 + * @size: Length of the IOVA 227 + * @flags: A bitmap of IOMMU_DIRTY_NO_CLEAR 228 + * @dirty: Place to store the dirty bits 229 + * 230 + * Iterate over all the entries in the mapped range and record their write dirty 231 + * status in iommu_dirty_bitmap. If IOMMU_DIRTY_NO_CLEAR is not specified then 232 + * the entries will be left dirty, otherwise they are returned to being not 233 + * write dirty. 234 + * 235 + * Context: The caller must hold a read range lock that includes @iova. 236 + * 237 + * Returns: -ERRNO on failure, 0 on success. 238 + */ 239 + int DOMAIN_NS(read_and_clear_dirty)(struct iommu_domain *domain, 240 + unsigned long iova, size_t size, 241 + unsigned long flags, 242 + struct iommu_dirty_bitmap *dirty) 243 + { 244 + struct pt_iommu *iommu_table = 245 + container_of(domain, struct pt_iommu, domain); 246 + struct pt_iommu_dirty_args dirty_args = { 247 + .dirty = dirty, 248 + .flags = flags, 249 + }; 250 + struct pt_range range; 251 + int ret; 252 + 253 + #if !IS_ENABLED(CONFIG_IOMMUFD_DRIVER) || !defined(pt_entry_is_write_dirty) 254 + return -EOPNOTSUPP; 255 + #endif 256 + 257 + ret = make_range(common_from_iommu(iommu_table), &range, iova, size); 258 + if (ret) 259 + return ret; 260 + 261 + ret = pt_walk_range(&range, __read_and_clear_dirty, &dirty_args); 262 + PT_WARN_ON(ret); 263 + return ret; 264 + } 265 + EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(read_and_clear_dirty), "GENERIC_PT_IOMMU"); 266 + 165 267 struct pt_iommu_collect_args { 166 268 struct iommu_pages_list free_list; 167 269 /* Fail if any OAs are within the range */ ··· 1117 1015 MODULE_LICENSE("GPL"); 1118 1016 MODULE_DESCRIPTION("IOMMU Page table implementation for " __stringify(PTPFX_RAW)); 1119 1017 MODULE_IMPORT_NS("GENERIC_PT"); 1018 + /* For iommu_dirty_bitmap_record() */ 1019 + MODULE_IMPORT_NS("IOMMUFD"); 1120 1020 1121 1021 #endif /* __GENERIC_PT_IOMMU_PT_H */
+6
include/linux/generic_pt/iommu.h
··· 12 12 struct iommu_iotlb_gather; 13 13 struct pt_iommu_ops; 14 14 struct pt_iommu_driver_ops; 15 + struct iommu_dirty_bitmap; 15 16 16 17 /** 17 18 * DOC: IOMMU Radix Page Table ··· 183 182 struct iommu_domain *domain, unsigned long iova, \ 184 183 size_t pgsize, size_t pgcount, \ 185 184 struct iommu_iotlb_gather *iotlb_gather); \ 185 + int pt_iommu_##fmt##_read_and_clear_dirty( \ 186 + struct iommu_domain *domain, unsigned long iova, size_t size, \ 187 + unsigned long flags, struct iommu_dirty_bitmap *dirty); \ 186 188 int pt_iommu_##fmt##_init(struct pt_iommu_##fmt *table, \ 187 189 const struct pt_iommu_##fmt##_cfg *cfg, \ 188 190 gfp_t gfp); \ ··· 206 202 .iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, \ 207 203 .map_pages = &pt_iommu_##fmt##_map_pages, \ 208 204 .unmap_pages = &pt_iommu_##fmt##_unmap_pages 205 + #define IOMMU_PT_DIRTY_OPS(fmt) \ 206 + .read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty 209 207 210 208 /* 211 209 * The driver should setup its domain struct like