include/uapi/linux/iommufd.h at 735dad999905dfd246be1994bb8d203063aeb0d6

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / include / uapi / linux / iommufd.h
at 735dad999905dfd246be1994bb8d203063aeb0d6 1341 lines 51 kB view raw
wrap content
   1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
   2/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
   3 */
   4#ifndef _UAPI_IOMMUFD_H
   5#define _UAPI_IOMMUFD_H
   6
   7#include <linux/ioctl.h>
   8#include <linux/types.h>
   9
  10#define IOMMUFD_TYPE (';')
  11
  12/**
  13 * DOC: General ioctl format
  14 *
  15 * The ioctl interface follows a general format to allow for extensibility. Each
  16 * ioctl is passed in a structure pointer as the argument providing the size of
  17 * the structure in the first u32. The kernel checks that any structure space
  18 * beyond what it understands is 0. This allows userspace to use the backward
  19 * compatible portion while consistently using the newer, larger, structures.
  20 *
  21 * ioctls use a standard meaning for common errnos:
  22 *
  23 *  - ENOTTY: The IOCTL number itself is not supported at all
  24 *  - E2BIG: The IOCTL number is supported, but the provided structure has
  25 *    non-zero in a part the kernel does not understand.
  26 *  - EOPNOTSUPP: The IOCTL number is supported, and the structure is
  27 *    understood, however a known field has a value the kernel does not
  28 *    understand or support.
  29 *  - EINVAL: Everything about the IOCTL was understood, but a field is not
  30 *    correct.
  31 *  - ENOENT: An ID or IOVA provided does not exist.
  32 *  - ENOMEM: Out of memory.
  33 *  - EOVERFLOW: Mathematics overflowed.
  34 *
  35 * As well as additional errnos, within specific ioctls.
  36 */
  37enum {
  38	IOMMUFD_CMD_BASE = 0x80,
  39	IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
  40	IOMMUFD_CMD_IOAS_ALLOC = 0x81,
  41	IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82,
  42	IOMMUFD_CMD_IOAS_COPY = 0x83,
  43	IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84,
  44	IOMMUFD_CMD_IOAS_MAP = 0x85,
  45	IOMMUFD_CMD_IOAS_UNMAP = 0x86,
  46	IOMMUFD_CMD_OPTION = 0x87,
  47	IOMMUFD_CMD_VFIO_IOAS = 0x88,
  48	IOMMUFD_CMD_HWPT_ALLOC = 0x89,
  49	IOMMUFD_CMD_GET_HW_INFO = 0x8a,
  50	IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b,
  51	IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c,
  52	IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
  53	IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
  54	IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f,
  55	IOMMUFD_CMD_VIOMMU_ALLOC = 0x90,
  56	IOMMUFD_CMD_VDEVICE_ALLOC = 0x91,
  57	IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92,
  58	IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93,
  59	IOMMUFD_CMD_HW_QUEUE_ALLOC = 0x94,
  60};
  61
  62/**
  63 * struct iommu_destroy - ioctl(IOMMU_DESTROY)
  64 * @size: sizeof(struct iommu_destroy)
  65 * @id: iommufd object ID to destroy. Can be any destroyable object type.
  66 *
  67 * Destroy any object held within iommufd.
  68 */
  69struct iommu_destroy {
  70	__u32 size;
  71	__u32 id;
  72};
  73#define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY)
  74
  75/**
  76 * struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC)
  77 * @size: sizeof(struct iommu_ioas_alloc)
  78 * @flags: Must be 0
  79 * @out_ioas_id: Output IOAS ID for the allocated object
  80 *
  81 * Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA)
  82 * to memory mapping.
  83 */
  84struct iommu_ioas_alloc {
  85	__u32 size;
  86	__u32 flags;
  87	__u32 out_ioas_id;
  88};
  89#define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC)
  90
  91/**
  92 * struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE)
  93 * @start: First IOVA
  94 * @last: Inclusive last IOVA
  95 *
  96 * An interval in IOVA space.
  97 */
  98struct iommu_iova_range {
  99	__aligned_u64 start;
 100	__aligned_u64 last;
 101};
 102
 103/**
 104 * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES)
 105 * @size: sizeof(struct iommu_ioas_iova_ranges)
 106 * @ioas_id: IOAS ID to read ranges from
 107 * @num_iovas: Input/Output total number of ranges in the IOAS
 108 * @__reserved: Must be 0
 109 * @allowed_iovas: Pointer to the output array of struct iommu_iova_range
 110 * @out_iova_alignment: Minimum alignment required for mapping IOVA
 111 *
 112 * Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges
 113 * is not allowed. num_iovas will be set to the total number of iovas and
 114 * the allowed_iovas[] will be filled in as space permits.
 115 *
 116 * The allowed ranges are dependent on the HW path the DMA operation takes, and
 117 * can change during the lifetime of the IOAS. A fresh empty IOAS will have a
 118 * full range, and each attached device will narrow the ranges based on that
 119 * device's HW restrictions. Detaching a device can widen the ranges. Userspace
 120 * should query ranges after every attach/detach to know what IOVAs are valid
 121 * for mapping.
 122 *
 123 * On input num_iovas is the length of the allowed_iovas array. On output it is
 124 * the total number of iovas filled in. The ioctl will return -EMSGSIZE and set
 125 * num_iovas to the required value if num_iovas is too small. In this case the
 126 * caller should allocate a larger output array and re-issue the ioctl.
 127 *
 128 * out_iova_alignment returns the minimum IOVA alignment that can be given
 129 * to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy::
 130 *
 131 *   starting_iova % out_iova_alignment == 0
 132 *   (starting_iova + length) % out_iova_alignment == 0
 133 *
 134 * out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot
 135 * be higher than the system PAGE_SIZE.
 136 */
 137struct iommu_ioas_iova_ranges {
 138	__u32 size;
 139	__u32 ioas_id;
 140	__u32 num_iovas;
 141	__u32 __reserved;
 142	__aligned_u64 allowed_iovas;
 143	__aligned_u64 out_iova_alignment;
 144};
 145#define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES)
 146
 147/**
 148 * struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS)
 149 * @size: sizeof(struct iommu_ioas_allow_iovas)
 150 * @ioas_id: IOAS ID to allow IOVAs from
 151 * @num_iovas: Input/Output total number of ranges in the IOAS
 152 * @__reserved: Must be 0
 153 * @allowed_iovas: Pointer to array of struct iommu_iova_range
 154 *
 155 * Ensure a range of IOVAs are always available for allocation. If this call
 156 * succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA ranges
 157 * that are narrower than the ranges provided here. This call will fail if
 158 * IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges.
 159 *
 160 * When an IOAS is first created the IOVA_RANGES will be maximally sized, and as
 161 * devices are attached the IOVA will narrow based on the device restrictions.
 162 * When an allowed range is specified any narrowing will be refused, ie device
 163 * attachment can fail if the device requires limiting within the allowed range.
 164 *
 165 * Automatic IOVA allocation is also impacted by this call. MAP will only
 166 * allocate within the allowed IOVAs if they are present.
 167 *
 168 * This call replaces the entire allowed list with the given list.
 169 */
 170struct iommu_ioas_allow_iovas {
 171	__u32 size;
 172	__u32 ioas_id;
 173	__u32 num_iovas;
 174	__u32 __reserved;
 175	__aligned_u64 allowed_iovas;
 176};
 177#define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOW_IOVAS)
 178
 179/**
 180 * enum iommufd_ioas_map_flags - Flags for map and copy
 181 * @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate
 182 *                             IOVA to place the mapping at
 183 * @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping
 184 * @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping
 185 */
 186enum iommufd_ioas_map_flags {
 187	IOMMU_IOAS_MAP_FIXED_IOVA = 1 << 0,
 188	IOMMU_IOAS_MAP_WRITEABLE = 1 << 1,
 189	IOMMU_IOAS_MAP_READABLE = 1 << 2,
 190};
 191
 192/**
 193 * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP)
 194 * @size: sizeof(struct iommu_ioas_map)
 195 * @flags: Combination of enum iommufd_ioas_map_flags
 196 * @ioas_id: IOAS ID to change the mapping of
 197 * @__reserved: Must be 0
 198 * @user_va: Userspace pointer to start mapping from
 199 * @length: Number of bytes to map
 200 * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set
 201 *        then this must be provided as input.
 202 *
 203 * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the
 204 * mapping will be established at iova, otherwise a suitable location based on
 205 * the reserved and allowed lists will be automatically selected and returned in
 206 * iova.
 207 *
 208 * If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently
 209 * be unused, existing IOVA cannot be replaced.
 210 */
 211struct iommu_ioas_map {
 212	__u32 size;
 213	__u32 flags;
 214	__u32 ioas_id;
 215	__u32 __reserved;
 216	__aligned_u64 user_va;
 217	__aligned_u64 length;
 218	__aligned_u64 iova;
 219};
 220#define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
 221
 222/**
 223 * struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE)
 224 * @size: sizeof(struct iommu_ioas_map_file)
 225 * @flags: same as for iommu_ioas_map
 226 * @ioas_id: same as for iommu_ioas_map
 227 * @fd: the memfd to map
 228 * @start: byte offset from start of file to map from
 229 * @length: same as for iommu_ioas_map
 230 * @iova: same as for iommu_ioas_map
 231 *
 232 * Set an IOVA mapping from a memfd file.  All other arguments and semantics
 233 * match those of IOMMU_IOAS_MAP.
 234 */
 235struct iommu_ioas_map_file {
 236	__u32 size;
 237	__u32 flags;
 238	__u32 ioas_id;
 239	__s32 fd;
 240	__aligned_u64 start;
 241	__aligned_u64 length;
 242	__aligned_u64 iova;
 243};
 244#define IOMMU_IOAS_MAP_FILE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP_FILE)
 245
 246/**
 247 * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY)
 248 * @size: sizeof(struct iommu_ioas_copy)
 249 * @flags: Combination of enum iommufd_ioas_map_flags
 250 * @dst_ioas_id: IOAS ID to change the mapping of
 251 * @src_ioas_id: IOAS ID to copy from
 252 * @length: Number of bytes to copy and map
 253 * @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is
 254 *            set then this must be provided as input.
 255 * @src_iova: IOVA to start the copy
 256 *
 257 * Copy an already existing mapping from src_ioas_id and establish it in
 258 * dst_ioas_id. The src iova/length must exactly match a range used with
 259 * IOMMU_IOAS_MAP.
 260 *
 261 * This may be used to efficiently clone a subset of an IOAS to another, or as a
 262 * kind of 'cache' to speed up mapping. Copy has an efficiency advantage over
 263 * establishing equivalent new mappings, as internal resources are shared, and
 264 * the kernel will pin the user memory only once.
 265 */
 266struct iommu_ioas_copy {
 267	__u32 size;
 268	__u32 flags;
 269	__u32 dst_ioas_id;
 270	__u32 src_ioas_id;
 271	__aligned_u64 length;
 272	__aligned_u64 dst_iova;
 273	__aligned_u64 src_iova;
 274};
 275#define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY)
 276
 277/**
 278 * struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP)
 279 * @size: sizeof(struct iommu_ioas_unmap)
 280 * @ioas_id: IOAS ID to change the mapping of
 281 * @iova: IOVA to start the unmapping at
 282 * @length: Number of bytes to unmap, and return back the bytes unmapped
 283 *
 284 * Unmap an IOVA range. The iova/length must be a superset of a previously
 285 * mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or
 286 * truncating ranges is not allowed. The values 0 to U64_MAX will unmap
 287 * everything.
 288 */
 289struct iommu_ioas_unmap {
 290	__u32 size;
 291	__u32 ioas_id;
 292	__aligned_u64 iova;
 293	__aligned_u64 length;
 294};
 295#define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP)
 296
 297/**
 298 * enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and
 299 *                       ioctl(IOMMU_OPTION_HUGE_PAGES)
 300 * @IOMMU_OPTION_RLIMIT_MODE:
 301 *    Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege
 302 *    to invoke this. Value 0 (default) is user based accounting, 1 uses process
 303 *    based accounting. Global option, object_id must be 0
 304 * @IOMMU_OPTION_HUGE_PAGES:
 305 *    Value 1 (default) allows contiguous pages to be combined when generating
 306 *    iommu mappings. Value 0 disables combining, everything is mapped to
 307 *    PAGE_SIZE. This can be useful for benchmarking.  This is a per-IOAS
 308 *    option, the object_id must be the IOAS ID.
 309 */
 310enum iommufd_option {
 311	IOMMU_OPTION_RLIMIT_MODE = 0,
 312	IOMMU_OPTION_HUGE_PAGES = 1,
 313};
 314
 315/**
 316 * enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and
 317 *                           ioctl(IOMMU_OPTION_OP_GET)
 318 * @IOMMU_OPTION_OP_SET: Set the option's value
 319 * @IOMMU_OPTION_OP_GET: Get the option's value
 320 */
 321enum iommufd_option_ops {
 322	IOMMU_OPTION_OP_SET = 0,
 323	IOMMU_OPTION_OP_GET = 1,
 324};
 325
 326/**
 327 * struct iommu_option - iommu option multiplexer
 328 * @size: sizeof(struct iommu_option)
 329 * @option_id: One of enum iommufd_option
 330 * @op: One of enum iommufd_option_ops
 331 * @__reserved: Must be 0
 332 * @object_id: ID of the object if required
 333 * @val64: Option value to set or value returned on get
 334 *
 335 * Change a simple option value. This multiplexor allows controlling options
 336 * on objects. IOMMU_OPTION_OP_SET will load an option and IOMMU_OPTION_OP_GET
 337 * will return the current value.
 338 */
 339struct iommu_option {
 340	__u32 size;
 341	__u32 option_id;
 342	__u16 op;
 343	__u16 __reserved;
 344	__u32 object_id;
 345	__aligned_u64 val64;
 346};
 347#define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION)
 348
 349/**
 350 * enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls
 351 * @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS
 352 * @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS
 353 * @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility
 354 */
 355enum iommufd_vfio_ioas_op {
 356	IOMMU_VFIO_IOAS_GET = 0,
 357	IOMMU_VFIO_IOAS_SET = 1,
 358	IOMMU_VFIO_IOAS_CLEAR = 2,
 359};
 360
 361/**
 362 * struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS)
 363 * @size: sizeof(struct iommu_vfio_ioas)
 364 * @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set
 365 *           For IOMMU_VFIO_IOAS_GET will output the IOAS ID
 366 * @op: One of enum iommufd_vfio_ioas_op
 367 * @__reserved: Must be 0
 368 *
 369 * The VFIO compatibility support uses a single ioas because VFIO APIs do not
 370 * support the ID field. Set or Get the IOAS that VFIO compatibility will use.
 371 * When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the
 372 * compatibility ioas, either by taking what is already set, or auto creating
 373 * one. From then on VFIO will continue to use that ioas and is not effected by
 374 * this ioctl. SET or CLEAR does not destroy any auto-created IOAS.
 375 */
 376struct iommu_vfio_ioas {
 377	__u32 size;
 378	__u32 ioas_id;
 379	__u16 op;
 380	__u16 __reserved;
 381};
 382#define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS)
 383
 384/**
 385 * enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation
 386 * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a HWPT that can serve as
 387 *                                the parent HWPT in a nesting configuration.
 388 * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is
 389 *                                   enforced on device attachment
 390 * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is
 391 *                             valid.
 392 * @IOMMU_HWPT_ALLOC_PASID: Requests a domain that can be used with PASID. The
 393 *                          domain can be attached to any PASID on the device.
 394 *                          Any domain attached to the non-PASID part of the
 395 *                          device must also be flagged, otherwise attaching a
 396 *                          PASID will blocked.
 397 *                          For the user that wants to attach PASID, ioas is
 398 *                          not recommended for both the non-PASID part
 399 *                          and PASID part of the device.
 400 *                          If IOMMU does not support PASID it will return
 401 *                          error (-EOPNOTSUPP).
 402 */
 403enum iommufd_hwpt_alloc_flags {
 404	IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
 405	IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
 406	IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
 407	IOMMU_HWPT_ALLOC_PASID = 1 << 3,
 408};
 409
 410/**
 411 * enum iommu_hwpt_vtd_s1_flags - Intel VT-d stage-1 page table
 412 *                                entry attributes
 413 * @IOMMU_VTD_S1_SRE: Supervisor request
 414 * @IOMMU_VTD_S1_EAFE: Extended access enable
 415 * @IOMMU_VTD_S1_WPE: Write protect enable
 416 */
 417enum iommu_hwpt_vtd_s1_flags {
 418	IOMMU_VTD_S1_SRE = 1 << 0,
 419	IOMMU_VTD_S1_EAFE = 1 << 1,
 420	IOMMU_VTD_S1_WPE = 1 << 2,
 421};
 422
 423/**
 424 * struct iommu_hwpt_vtd_s1 - Intel VT-d stage-1 page table
 425 *                            info (IOMMU_HWPT_DATA_VTD_S1)
 426 * @flags: Combination of enum iommu_hwpt_vtd_s1_flags
 427 * @pgtbl_addr: The base address of the stage-1 page table.
 428 * @addr_width: The address width of the stage-1 page table
 429 * @__reserved: Must be 0
 430 */
 431struct iommu_hwpt_vtd_s1 {
 432	__aligned_u64 flags;
 433	__aligned_u64 pgtbl_addr;
 434	__u32 addr_width;
 435	__u32 __reserved;
 436};
 437
 438/**
 439 * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 nested STE
 440 *                                (IOMMU_HWPT_DATA_ARM_SMMUV3)
 441 *
 442 * @ste: The first two double words of the user space Stream Table Entry for
 443 *       the translation. Must be little-endian.
 444 *       Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec)
 445 *       - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax
 446 *       - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD
 447 *
 448 * -EIO will be returned if @ste is not legal or contains any non-allowed field.
 449 * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass
 450 * nested domain will translate the same as the nesting parent. The S1 will
 451 * install a Context Descriptor Table pointing at userspace memory translated
 452 * by the nesting parent.
 453 *
 454 * It's suggested to allocate a vDEVICE object carrying vSID and then re-attach
 455 * the nested domain, as soon as the vSID is available in the VMM level:
 456 *
 457 * - when Cfg=translate, a vDEVICE must be allocated prior to attaching to the
 458 *   allocated nested domain, as CD/ATS invalidations and vevents need a vSID.
 459 * - when Cfg=bypass/abort, a vDEVICE is not enforced during the nested domain
 460 *   attachment, to support a GBPA case where VM sets CR0.SMMUEN=0. However, if
 461 *   VM sets CR0.SMMUEN=1 while missing a vDEVICE object, kernel would fail to
 462 *   report events to the VM. E.g. F_TRANSLATION when guest STE.Cfg=abort.
 463 */
 464struct iommu_hwpt_arm_smmuv3 {
 465	__aligned_le64 ste[2];
 466};
 467
 468/**
 469 * struct iommu_hwpt_amd_guest - AMD IOMMU guest I/O page table data
 470 *				 (IOMMU_HWPT_DATA_AMD_GUEST)
 471 * @dte: Guest Device Table Entry (DTE)
 472 */
 473struct iommu_hwpt_amd_guest {
 474	__aligned_u64 dte[4];
 475};
 476
 477/**
 478 * enum iommu_hwpt_data_type - IOMMU HWPT Data Type
 479 * @IOMMU_HWPT_DATA_NONE: no data
 480 * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table
 481 * @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table
 482 * @IOMMU_HWPT_DATA_AMD_GUEST: AMD IOMMU guest page table
 483 */
 484enum iommu_hwpt_data_type {
 485	IOMMU_HWPT_DATA_NONE = 0,
 486	IOMMU_HWPT_DATA_VTD_S1 = 1,
 487	IOMMU_HWPT_DATA_ARM_SMMUV3 = 2,
 488	IOMMU_HWPT_DATA_AMD_GUEST = 3,
 489};
 490
 491/**
 492 * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC)
 493 * @size: sizeof(struct iommu_hwpt_alloc)
 494 * @flags: Combination of enum iommufd_hwpt_alloc_flags
 495 * @dev_id: The device to allocate this HWPT for
 496 * @pt_id: The IOAS or HWPT or vIOMMU to connect this HWPT to
 497 * @out_hwpt_id: The ID of the new HWPT
 498 * @__reserved: Must be 0
 499 * @data_type: One of enum iommu_hwpt_data_type
 500 * @data_len: Length of the type specific data
 501 * @data_uptr: User pointer to the type specific data
 502 * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of
 503 *            IOMMU_HWPT_FAULT_ID_VALID is set.
 504 * @__reserved2: Padding to 64-bit alignment. Must be 0.
 505 *
 506 * Explicitly allocate a hardware page table object. This is the same object
 507 * type that is returned by iommufd_device_attach() and represents the
 508 * underlying iommu driver's iommu_domain kernel object.
 509 *
 510 * A kernel-managed HWPT will be created with the mappings from the given
 511 * IOAS via the @pt_id. The @data_type for this allocation must be set to
 512 * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a
 513 * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags.
 514 *
 515 * A user-managed nested HWPT will be created from a given vIOMMU (wrapping a
 516 * parent HWPT) or a parent HWPT via @pt_id, in which the parent HWPT must be
 517 * allocated previously via the same ioctl from a given IOAS (@pt_id). In this
 518 * case, the @data_type must be set to a pre-defined type corresponding to an
 519 * I/O page table type supported by the underlying IOMMU hardware. The device
 520 * via @dev_id and the vIOMMU via @pt_id must be associated to the same IOMMU
 521 * instance.
 522 *
 523 * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and
 524 * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr
 525 * must be given.
 526 */
 527struct iommu_hwpt_alloc {
 528	__u32 size;
 529	__u32 flags;
 530	__u32 dev_id;
 531	__u32 pt_id;
 532	__u32 out_hwpt_id;
 533	__u32 __reserved;
 534	__u32 data_type;
 535	__u32 data_len;
 536	__aligned_u64 data_uptr;
 537	__u32 fault_id;
 538	__u32 __reserved2;
 539};
 540#define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
 541
 542/**
 543 * enum iommu_hw_info_vtd_flags - Flags for VT-d hw_info
 544 * @IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17: If set, disallow read-only mappings
 545 *                                         on a nested_parent domain.
 546 *                                         https://www.intel.com/content/www/us/en/content-details/772415/content-details.html
 547 */
 548enum iommu_hw_info_vtd_flags {
 549	IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 = 1 << 0,
 550};
 551
 552/**
 553 * struct iommu_hw_info_vtd - Intel VT-d hardware information
 554 *
 555 * @flags: Combination of enum iommu_hw_info_vtd_flags
 556 * @__reserved: Must be 0
 557 *
 558 * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec
 559 *           section 11.4.2 Capability Register.
 560 * @ecap_reg: Value of Intel VT-d capability register defined in VT-d spec
 561 *            section 11.4.3 Extended Capability Register.
 562 *
 563 * User needs to understand the Intel VT-d specification to decode the
 564 * register value.
 565 */
 566struct iommu_hw_info_vtd {
 567	__u32 flags;
 568	__u32 __reserved;
 569	__aligned_u64 cap_reg;
 570	__aligned_u64 ecap_reg;
 571};
 572
 573/**
 574 * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information
 575 *                                   (IOMMU_HW_INFO_TYPE_ARM_SMMUV3)
 576 *
 577 * @flags: Must be set to 0
 578 * @__reserved: Must be 0
 579 * @idr: Implemented features for ARM SMMU Non-secure programming interface
 580 * @iidr: Information about the implementation and implementer of ARM SMMU,
 581 *        and architecture version supported
 582 * @aidr: ARM SMMU architecture version
 583 *
 584 * For the details of @idr, @iidr and @aidr, please refer to the chapters
 585 * from 6.3.1 to 6.3.6 in the SMMUv3 Spec.
 586 *
 587 * This reports the raw HW capability, and not all bits are meaningful to be
 588 * read by userspace. Only the following fields should be used:
 589 *
 590 * idr[0]: ST_LEVEL, TERM_MODEL, STALL_MODEL, TTENDIAN , CD2L, ASID16, TTF
 591 * idr[1]: SIDSIZE, SSIDSIZE
 592 * idr[3]: BBML, RIL
 593 * idr[5]: VAX, GRAN64K, GRAN16K, GRAN4K
 594 *
 595 * - S1P should be assumed to be true if a NESTED HWPT can be created
 596 * - VFIO/iommufd only support platforms with COHACC, it should be assumed to be
 597 *   true.
 598 * - ATS is a per-device property. If the VMM describes any devices as ATS
 599 *   capable in ACPI/DT it should set the corresponding idr.
 600 *
 601 * This list may expand in future (eg E0PD, AIE, PBHA, D128, DS etc). It is
 602 * important that VMMs do not read bits outside the list to allow for
 603 * compatibility with future kernels. Several features in the SMMUv3
 604 * architecture are not currently supported by the kernel for nesting: HTTU,
 605 * BTM, MPAM and others.
 606 */
 607struct iommu_hw_info_arm_smmuv3 {
 608	__u32 flags;
 609	__u32 __reserved;
 610	__u32 idr[6];
 611	__u32 iidr;
 612	__u32 aidr;
 613};
 614
 615/**
 616 * struct iommu_hw_info_tegra241_cmdqv - NVIDIA Tegra241 CMDQV Hardware
 617 *         Information (IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV)
 618 *
 619 * @flags: Must be 0
 620 * @version: Version number for the CMDQ-V HW for PARAM bits[03:00]
 621 * @log2vcmdqs: Log2 of the total number of VCMDQs for PARAM bits[07:04]
 622 * @log2vsids: Log2 of the total number of SID replacements for PARAM bits[15:12]
 623 * @__reserved: Must be 0
 624 *
 625 * VMM can use these fields directly in its emulated global PARAM register. Note
 626 * that only one Virtual Interface (VINTF) should be exposed to a VM, i.e. PARAM
 627 * bits[11:08] should be set to 0 for log2 of the total number of VINTFs.
 628 */
 629struct iommu_hw_info_tegra241_cmdqv {
 630	__u32 flags;
 631	__u8 version;
 632	__u8 log2vcmdqs;
 633	__u8 log2vsids;
 634	__u8 __reserved;
 635};
 636
 637/**
 638 * struct iommu_hw_info_amd - AMD IOMMU device info
 639 *
 640 * @efr : Value of AMD IOMMU Extended Feature Register (EFR)
 641 * @efr2: Value of AMD IOMMU Extended Feature 2 Register (EFR2)
 642 *
 643 * Please See description of these registers in the following sections of
 644 * the AMD I/O Virtualization Technology (IOMMU) Specification.
 645 * (https://docs.amd.com/v/u/en-US/48882_3.10_PUB)
 646 *
 647 * - MMIO Offset 0030h IOMMU Extended Feature Register
 648 * - MMIO Offset 01A0h IOMMU Extended Feature 2 Register
 649 *
 650 * Note: The EFR and EFR2 are raw values reported by hardware.
 651 * VMM is responsible to determine the appropriate flags to be exposed to
 652 * the VM since cetertain features are not currently supported by the kernel
 653 * for HW-vIOMMU.
 654 *
 655 * Current VMM-allowed list of feature flags are:
 656 * - EFR[GTSup, GASup, GioSup, PPRSup, EPHSup, GATS, GLX, PASmax]
 657 */
 658struct iommu_hw_info_amd {
 659	__aligned_u64 efr;
 660	__aligned_u64 efr2;
 661};
 662
 663/**
 664 * enum iommu_hw_info_type - IOMMU Hardware Info Types
 665 * @IOMMU_HW_INFO_TYPE_NONE: Output by the drivers that do not report hardware
 666 *                           info
 667 * @IOMMU_HW_INFO_TYPE_DEFAULT: Input to request for a default type
 668 * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
 669 * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type
 670 * @IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM
 671 *                                     SMMUv3) info type
 672 * @IOMMU_HW_INFO_TYPE_AMD: AMD IOMMU info type
 673 */
 674enum iommu_hw_info_type {
 675	IOMMU_HW_INFO_TYPE_NONE = 0,
 676	IOMMU_HW_INFO_TYPE_DEFAULT = 0,
 677	IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
 678	IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2,
 679	IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV = 3,
 680	IOMMU_HW_INFO_TYPE_AMD = 4,
 681};
 682
 683/**
 684 * enum iommufd_hw_capabilities
 685 * @IOMMU_HW_CAP_DIRTY_TRACKING: IOMMU hardware support for dirty tracking
 686 *                               If available, it means the following APIs
 687 *                               are supported:
 688 *
 689 *                                   IOMMU_HWPT_GET_DIRTY_BITMAP
 690 *                                   IOMMU_HWPT_SET_DIRTY_TRACKING
 691 *
 692 * @IOMMU_HW_CAP_PCI_PASID_EXEC: Execute Permission Supported, user ignores it
 693 *                               when the struct
 694 *                               iommu_hw_info::out_max_pasid_log2 is zero.
 695 * @IOMMU_HW_CAP_PCI_PASID_PRIV: Privileged Mode Supported, user ignores it
 696 *                               when the struct
 697 *                               iommu_hw_info::out_max_pasid_log2 is zero.
 698 */
 699enum iommufd_hw_capabilities {
 700	IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0,
 701	IOMMU_HW_CAP_PCI_PASID_EXEC = 1 << 1,
 702	IOMMU_HW_CAP_PCI_PASID_PRIV = 1 << 2,
 703};
 704
 705/**
 706 * enum iommufd_hw_info_flags - Flags for iommu_hw_info
 707 * @IOMMU_HW_INFO_FLAG_INPUT_TYPE: If set, @in_data_type carries an input type
 708 *                                 for user space to request for a specific info
 709 */
 710enum iommufd_hw_info_flags {
 711	IOMMU_HW_INFO_FLAG_INPUT_TYPE = 1 << 0,
 712};
 713
 714/**
 715 * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO)
 716 * @size: sizeof(struct iommu_hw_info)
 717 * @flags: Must be 0
 718 * @dev_id: The device bound to the iommufd
 719 * @data_len: Input the length of a user buffer in bytes. Output the length of
 720 *            data that kernel supports
 721 * @data_uptr: User pointer to a user-space buffer used by the kernel to fill
 722 *             the iommu type specific hardware information data
 723 * @in_data_type: This shares the same field with @out_data_type, making it be
 724 *                a bidirectional field. When IOMMU_HW_INFO_FLAG_INPUT_TYPE is
 725 *                set, an input type carried via this @in_data_type field will
 726 *                be valid, requesting for the info data to the given type. If
 727 *                IOMMU_HW_INFO_FLAG_INPUT_TYPE is unset, any input value will
 728 *                be seen as IOMMU_HW_INFO_TYPE_DEFAULT
 729 * @out_data_type: Output the iommu hardware info type as defined in the enum
 730 *                 iommu_hw_info_type.
 731 * @out_capabilities: Output the generic iommu capability info type as defined
 732 *                    in the enum iommu_hw_capabilities.
 733 * @out_max_pasid_log2: Output the width of PASIDs. 0 means no PASID support.
 734 *                      PCI devices turn to out_capabilities to check if the
 735 *                      specific capabilities is supported or not.
 736 * @__reserved: Must be 0
 737 *
 738 * Query an iommu type specific hardware information data from an iommu behind
 739 * a given device that has been bound to iommufd. This hardware info data will
 740 * be used to sync capabilities between the virtual iommu and the physical
 741 * iommu, e.g. a nested translation setup needs to check the hardware info, so
 742 * a guest stage-1 page table can be compatible with the physical iommu.
 743 *
 744 * To capture an iommu type specific hardware information data, @data_uptr and
 745 * its length @data_len must be provided. Trailing bytes will be zeroed if the
 746 * user buffer is larger than the data that kernel has. Otherwise, kernel only
 747 * fills the buffer using the given length in @data_len. If the ioctl succeeds,
 748 * @data_len will be updated to the length that kernel actually supports,
 749 * @out_data_type will be filled to decode the data filled in the buffer
 750 * pointed by @data_uptr. Input @data_len == zero is allowed.
 751 */
 752struct iommu_hw_info {
 753	__u32 size;
 754	__u32 flags;
 755	__u32 dev_id;
 756	__u32 data_len;
 757	__aligned_u64 data_uptr;
 758	union {
 759		__u32 in_data_type;
 760		__u32 out_data_type;
 761	};
 762	__u8 out_max_pasid_log2;
 763	__u8 __reserved[3];
 764	__aligned_u64 out_capabilities;
 765};
 766#define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO)
 767
 768/*
 769 * enum iommufd_hwpt_set_dirty_tracking_flags - Flags for steering dirty
 770 *                                              tracking
 771 * @IOMMU_HWPT_DIRTY_TRACKING_ENABLE: Enable dirty tracking
 772 */
 773enum iommufd_hwpt_set_dirty_tracking_flags {
 774	IOMMU_HWPT_DIRTY_TRACKING_ENABLE = 1,
 775};
 776
 777/**
 778 * struct iommu_hwpt_set_dirty_tracking - ioctl(IOMMU_HWPT_SET_DIRTY_TRACKING)
 779 * @size: sizeof(struct iommu_hwpt_set_dirty_tracking)
 780 * @flags: Combination of enum iommufd_hwpt_set_dirty_tracking_flags
 781 * @hwpt_id: HW pagetable ID that represents the IOMMU domain
 782 * @__reserved: Must be 0
 783 *
 784 * Toggle dirty tracking on an HW pagetable.
 785 */
 786struct iommu_hwpt_set_dirty_tracking {
 787	__u32 size;
 788	__u32 flags;
 789	__u32 hwpt_id;
 790	__u32 __reserved;
 791};
 792#define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \
 793					  IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING)
 794
 795/**
 796 * enum iommufd_hwpt_get_dirty_bitmap_flags - Flags for getting dirty bits
 797 * @IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR: Just read the PTEs without clearing
 798 *                                        any dirty bits metadata. This flag
 799 *                                        can be passed in the expectation
 800 *                                        where the next operation is an unmap
 801 *                                        of the same IOVA range.
 802 *
 803 */
 804enum iommufd_hwpt_get_dirty_bitmap_flags {
 805	IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR = 1,
 806};
 807
 808/**
 809 * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP)
 810 * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap)
 811 * @hwpt_id: HW pagetable ID that represents the IOMMU domain
 812 * @flags: Combination of enum iommufd_hwpt_get_dirty_bitmap_flags
 813 * @__reserved: Must be 0
 814 * @iova: base IOVA of the bitmap first bit
 815 * @length: IOVA range size
 816 * @page_size: page size granularity of each bit in the bitmap
 817 * @data: bitmap where to set the dirty bits. The bitmap bits each
 818 *        represent a page_size which you deviate from an arbitrary iova.
 819 *
 820 * Checking a given IOVA is dirty:
 821 *
 822 *  data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64))
 823 *
 824 * Walk the IOMMU pagetables for a given IOVA range to return a bitmap
 825 * with the dirty IOVAs. In doing so it will also by default clear any
 826 * dirty bit metadata set in the IOPTE.
 827 */
 828struct iommu_hwpt_get_dirty_bitmap {
 829	__u32 size;
 830	__u32 hwpt_id;
 831	__u32 flags;
 832	__u32 __reserved;
 833	__aligned_u64 iova;
 834	__aligned_u64 length;
 835	__aligned_u64 page_size;
 836	__aligned_u64 data;
 837};
 838#define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \
 839					IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP)
 840
 841/**
 842 * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation
 843 *                                        Data Type
 844 * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
 845 * @IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3: Invalidation data for ARM SMMUv3
 846 */
 847enum iommu_hwpt_invalidate_data_type {
 848	IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0,
 849	IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3 = 1,
 850};
 851
 852/**
 853 * enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d
 854 *                                           stage-1 cache invalidation
 855 * @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies
 856 *                            to all-levels page structure cache or just
 857 *                            the leaf PTE cache.
 858 */
 859enum iommu_hwpt_vtd_s1_invalidate_flags {
 860	IOMMU_VTD_INV_FLAGS_LEAF = 1 << 0,
 861};
 862
 863/**
 864 * struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation
 865 *                                       (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1)
 866 * @addr: The start address of the range to be invalidated. It needs to
 867 *        be 4KB aligned.
 868 * @npages: Number of contiguous 4K pages to be invalidated.
 869 * @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags
 870 * @__reserved: Must be 0
 871 *
 872 * The Intel VT-d specific invalidation data for user-managed stage-1 cache
 873 * invalidation in nested translation. Userspace uses this structure to
 874 * tell the impacted cache scope after modifying the stage-1 page table.
 875 *
 876 * Invalidating all the caches related to the page table by setting @addr
 877 * to be 0 and @npages to be U64_MAX.
 878 *
 879 * The device TLB will be invalidated automatically if ATS is enabled.
 880 */
 881struct iommu_hwpt_vtd_s1_invalidate {
 882	__aligned_u64 addr;
 883	__aligned_u64 npages;
 884	__u32 flags;
 885	__u32 __reserved;
 886};
 887
 888/**
 889 * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cache invalidation
 890 *         (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3)
 891 * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ.
 892 *       Must be little-endian.
 893 *
 894 * Supported command list only when passing in a vIOMMU via @hwpt_id:
 895 *     CMDQ_OP_TLBI_NSNH_ALL
 896 *     CMDQ_OP_TLBI_NH_VA
 897 *     CMDQ_OP_TLBI_NH_VAA
 898 *     CMDQ_OP_TLBI_NH_ALL
 899 *     CMDQ_OP_TLBI_NH_ASID
 900 *     CMDQ_OP_ATC_INV
 901 *     CMDQ_OP_CFGI_CD
 902 *     CMDQ_OP_CFGI_CD_ALL
 903 *
 904 * -EIO will be returned if the command is not supported.
 905 */
 906struct iommu_viommu_arm_smmuv3_invalidate {
 907	__aligned_le64 cmd[2];
 908};
 909
 910/**
 911 * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
 912 * @size: sizeof(struct iommu_hwpt_invalidate)
 913 * @hwpt_id: ID of a nested HWPT or a vIOMMU, for cache invalidation
 914 * @data_uptr: User pointer to an array of driver-specific cache invalidation
 915 *             data.
 916 * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data
 917 *             type of all the entries in the invalidation request array. It
 918 *             should be a type supported by the hwpt pointed by @hwpt_id.
 919 * @entry_len: Length (in bytes) of a request entry in the request array
 920 * @entry_num: Input the number of cache invalidation requests in the array.
 921 *             Output the number of requests successfully handled by kernel.
 922 * @__reserved: Must be 0.
 923 *
 924 * Invalidate iommu cache for user-managed page table or vIOMMU. Modifications
 925 * on a user-managed page table should be followed by this operation, if a HWPT
 926 * is passed in via @hwpt_id. Other caches, such as device cache or descriptor
 927 * cache can be flushed if a vIOMMU is passed in via the @hwpt_id field.
 928 *
 929 * Each ioctl can support one or more cache invalidation requests in the array
 930 * that has a total size of @entry_len * @entry_num.
 931 *
 932 * An empty invalidation request array by setting @entry_num==0 is allowed, and
 933 * @entry_len and @data_uptr would be ignored in this case. This can be used to
 934 * check if the given @data_type is supported or not by kernel.
 935 */
 936struct iommu_hwpt_invalidate {
 937	__u32 size;
 938	__u32 hwpt_id;
 939	__aligned_u64 data_uptr;
 940	__u32 data_type;
 941	__u32 entry_len;
 942	__u32 entry_num;
 943	__u32 __reserved;
 944};
 945#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
 946
 947/**
 948 * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault
 949 * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is
 950 *                                   valid.
 951 * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group.
 952 */
 953enum iommu_hwpt_pgfault_flags {
 954	IOMMU_PGFAULT_FLAGS_PASID_VALID		= (1 << 0),
 955	IOMMU_PGFAULT_FLAGS_LAST_PAGE		= (1 << 1),
 956};
 957
 958/**
 959 * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault
 960 * @IOMMU_PGFAULT_PERM_READ: request for read permission
 961 * @IOMMU_PGFAULT_PERM_WRITE: request for write permission
 962 * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the
 963 *                           Execute Requested bit set in PASID TLP Prefix.
 964 * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the
 965 *                           Privileged Mode Requested bit set in PASID TLP
 966 *                           Prefix.
 967 */
 968enum iommu_hwpt_pgfault_perm {
 969	IOMMU_PGFAULT_PERM_READ			= (1 << 0),
 970	IOMMU_PGFAULT_PERM_WRITE		= (1 << 1),
 971	IOMMU_PGFAULT_PERM_EXEC			= (1 << 2),
 972	IOMMU_PGFAULT_PERM_PRIV			= (1 << 3),
 973};
 974
 975/**
 976 * struct iommu_hwpt_pgfault - iommu page fault data
 977 * @flags: Combination of enum iommu_hwpt_pgfault_flags
 978 * @dev_id: id of the originated device
 979 * @pasid: Process Address Space ID
 980 * @grpid: Page Request Group Index
 981 * @perm: Combination of enum iommu_hwpt_pgfault_perm
 982 * @__reserved: Must be 0.
 983 * @addr: Fault address
 984 * @length: a hint of how much data the requestor is expecting to fetch. For
 985 *          example, if the PRI initiator knows it is going to do a 10MB
 986 *          transfer, it could fill in 10MB and the OS could pre-fault in
 987 *          10MB of IOVA. It's default to 0 if there's no such hint.
 988 * @cookie: kernel-managed cookie identifying a group of fault messages. The
 989 *          cookie number encoded in the last page fault of the group should
 990 *          be echoed back in the response message.
 991 */
 992struct iommu_hwpt_pgfault {
 993	__u32 flags;
 994	__u32 dev_id;
 995	__u32 pasid;
 996	__u32 grpid;
 997	__u32 perm;
 998	__u32 __reserved;
 999	__aligned_u64 addr;
1000	__u32 length;
1001	__u32 cookie;
1002};
1003
1004/**
1005 * enum iommufd_page_response_code - Return status of fault handlers
1006 * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
1007 *                             populated, retry the access. This is the
1008 *                             "Success" defined in PCI 10.4.2.1.
1009 * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
1010 *                             access. This is the "Invalid Request" in PCI
1011 *                             10.4.2.1.
1012 */
1013enum iommufd_page_response_code {
1014	IOMMUFD_PAGE_RESP_SUCCESS = 0,
1015	IOMMUFD_PAGE_RESP_INVALID = 1,
1016};
1017
1018/**
1019 * struct iommu_hwpt_page_response - IOMMU page fault response
1020 * @cookie: The kernel-managed cookie reported in the fault message.
1021 * @code: One of response code in enum iommufd_page_response_code.
1022 */
1023struct iommu_hwpt_page_response {
1024	__u32 cookie;
1025	__u32 code;
1026};
1027
1028/**
1029 * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC)
1030 * @size: sizeof(struct iommu_fault_alloc)
1031 * @flags: Must be 0
1032 * @out_fault_id: The ID of the new FAULT
1033 * @out_fault_fd: The fd of the new FAULT
1034 *
1035 * Explicitly allocate a fault handling object.
1036 */
1037struct iommu_fault_alloc {
1038	__u32 size;
1039	__u32 flags;
1040	__u32 out_fault_id;
1041	__u32 out_fault_fd;
1042};
1043#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
1044
1045/**
1046 * enum iommu_viommu_type - Virtual IOMMU Type
1047 * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use
1048 * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type
1049 * @IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM
1050 *                                    SMMUv3) enabled ARM SMMUv3 type
1051 */
1052enum iommu_viommu_type {
1053	IOMMU_VIOMMU_TYPE_DEFAULT = 0,
1054	IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1,
1055	IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV = 2,
1056};
1057
1058/**
1059 * struct iommu_viommu_tegra241_cmdqv - NVIDIA Tegra241 CMDQV Virtual Interface
1060 *                                      (IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV)
1061 * @out_vintf_mmap_offset: mmap offset argument for VINTF's page0
1062 * @out_vintf_mmap_length: mmap length argument for VINTF's page0
1063 *
1064 * Both @out_vintf_mmap_offset and @out_vintf_mmap_length are reported by kernel
1065 * for user space to mmap the VINTF page0 from the host physical address space
1066 * to the guest physical address space so that a guest kernel can directly R/W
1067 * access to the VINTF page0 in order to control its virtual command queues.
1068 */
1069struct iommu_viommu_tegra241_cmdqv {
1070	__aligned_u64 out_vintf_mmap_offset;
1071	__aligned_u64 out_vintf_mmap_length;
1072};
1073
1074/**
1075 * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC)
1076 * @size: sizeof(struct iommu_viommu_alloc)
1077 * @flags: Must be 0
1078 * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type
1079 * @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU
1080 * @hwpt_id: ID of a nesting parent HWPT to associate to
1081 * @out_viommu_id: Output virtual IOMMU ID for the allocated object
1082 * @data_len: Length of the type specific data
1083 * @__reserved: Must be 0
1084 * @data_uptr: User pointer to a driver-specific virtual IOMMU data
1085 *
1086 * Allocate a virtual IOMMU object, representing the underlying physical IOMMU's
1087 * virtualization support that is a security-isolated slice of the real IOMMU HW
1088 * that is unique to a specific VM. Operations global to the IOMMU are connected
1089 * to the vIOMMU, such as:
1090 * - Security namespace for guest owned ID, e.g. guest-controlled cache tags
1091 * - Non-device-affiliated event reporting, e.g. invalidation queue errors
1092 * - Access to a sharable nesting parent pagetable across physical IOMMUs
1093 * - Virtualization of various platforms IDs, e.g. RIDs and others
1094 * - Delivery of paravirtualized invalidation
1095 * - Direct assigned invalidation queues
1096 * - Direct assigned interrupts
1097 */
1098struct iommu_viommu_alloc {
1099	__u32 size;
1100	__u32 flags;
1101	__u32 type;
1102	__u32 dev_id;
1103	__u32 hwpt_id;
1104	__u32 out_viommu_id;
1105	__u32 data_len;
1106	__u32 __reserved;
1107	__aligned_u64 data_uptr;
1108};
1109#define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC)
1110
1111/**
1112 * struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC)
1113 * @size: sizeof(struct iommu_vdevice_alloc)
1114 * @viommu_id: vIOMMU ID to associate with the virtual device
1115 * @dev_id: The physical device to allocate a virtual instance on the vIOMMU
1116 * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY
1117 * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID
1118 *           of AMD IOMMU, and vRID of Intel VT-d
1119 *
1120 * Allocate a virtual device instance (for a physical device) against a vIOMMU.
1121 * This instance holds the device's information (related to its vIOMMU) in a VM.
1122 * User should use IOMMU_DESTROY to destroy the virtual device before
1123 * destroying the physical device (by closing vfio_cdev fd). Otherwise the
1124 * virtual device would be forcibly destroyed on physical device destruction,
1125 * its vdevice_id would be permanently leaked (unremovable & unreusable) until
1126 * iommu fd closed.
1127 */
1128struct iommu_vdevice_alloc {
1129	__u32 size;
1130	__u32 viommu_id;
1131	__u32 dev_id;
1132	__u32 out_vdevice_id;
1133	__aligned_u64 virt_id;
1134};
1135#define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC)
1136
1137/**
1138 * struct iommu_ioas_change_process - ioctl(VFIO_IOAS_CHANGE_PROCESS)
1139 * @size: sizeof(struct iommu_ioas_change_process)
1140 * @__reserved: Must be 0
1141 *
1142 * This transfers pinned memory counts for every memory map in every IOAS
1143 * in the context to the current process.  This only supports maps created
1144 * with IOMMU_IOAS_MAP_FILE, and returns EINVAL if other maps are present.
1145 * If the ioctl returns a failure status, then nothing is changed.
1146 *
1147 * This API is useful for transferring operation of a device from one process
1148 * to another, such as during userland live update.
1149 */
1150struct iommu_ioas_change_process {
1151	__u32 size;
1152	__u32 __reserved;
1153};
1154
1155#define IOMMU_IOAS_CHANGE_PROCESS \
1156	_IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS)
1157
1158/**
1159 * enum iommu_veventq_flag - flag for struct iommufd_vevent_header
1160 * @IOMMU_VEVENTQ_FLAG_LOST_EVENTS: vEVENTQ has lost vEVENTs
1161 */
1162enum iommu_veventq_flag {
1163	IOMMU_VEVENTQ_FLAG_LOST_EVENTS = (1U << 0),
1164};
1165
1166/**
1167 * struct iommufd_vevent_header - Virtual Event Header for a vEVENTQ Status
1168 * @flags: Combination of enum iommu_veventq_flag
1169 * @sequence: The sequence index of a vEVENT in the vEVENTQ, with a range of
1170 *            [0, INT_MAX] where the following index of INT_MAX is 0
1171 *
1172 * Each iommufd_vevent_header reports a sequence index of the following vEVENT:
1173 *
1174 * +----------------------+-------+----------------------+-------+---+-------+
1175 * | header0 {sequence=0} | data0 | header1 {sequence=1} | data1 |...| dataN |
1176 * +----------------------+-------+----------------------+-------+---+-------+
1177 *
1178 * And this sequence index is expected to be monotonic to the sequence index of
1179 * the previous vEVENT. If two adjacent sequence indexes has a delta larger than
1180 * 1, it means that delta - 1 number of vEVENTs has lost, e.g. two lost vEVENTs:
1181 *
1182 * +-----+----------------------+-------+----------------------+-------+-----+
1183 * | ... | header3 {sequence=3} | data3 | header6 {sequence=6} | data6 | ... |
1184 * +-----+----------------------+-------+----------------------+-------+-----+
1185 *
1186 * If a vEVENT lost at the tail of the vEVENTQ and there is no following vEVENT
1187 * providing the next sequence index, an IOMMU_VEVENTQ_FLAG_LOST_EVENTS header
1188 * would be added to the tail, and no data would follow this header:
1189 *
1190 * +--+----------------------+-------+-----------------------------------------+
1191 * |..| header3 {sequence=3} | data3 | header4 {flags=LOST_EVENTS, sequence=4} |
1192 * +--+----------------------+-------+-----------------------------------------+
1193 */
1194struct iommufd_vevent_header {
1195	__u32 flags;
1196	__u32 sequence;
1197};
1198
1199/**
1200 * enum iommu_veventq_type - Virtual Event Queue Type
1201 * @IOMMU_VEVENTQ_TYPE_DEFAULT: Reserved for future use
1202 * @IOMMU_VEVENTQ_TYPE_ARM_SMMUV3: ARM SMMUv3 Virtual Event Queue
1203 * @IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV Extension IRQ
1204 */
1205enum iommu_veventq_type {
1206	IOMMU_VEVENTQ_TYPE_DEFAULT = 0,
1207	IOMMU_VEVENTQ_TYPE_ARM_SMMUV3 = 1,
1208	IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV = 2,
1209};
1210
1211/**
1212 * struct iommu_vevent_arm_smmuv3 - ARM SMMUv3 Virtual Event
1213 *                                  (IOMMU_VEVENTQ_TYPE_ARM_SMMUV3)
1214 * @evt: 256-bit ARM SMMUv3 Event record, little-endian.
1215 *       Reported event records: (Refer to "7.3 Event records" in SMMUv3 HW Spec)
1216 *       - 0x04 C_BAD_STE
1217 *       - 0x06 F_STREAM_DISABLED
1218 *       - 0x08 C_BAD_SUBSTREAMID
1219 *       - 0x0a C_BAD_CD
1220 *       - 0x10 F_TRANSLATION
1221 *       - 0x11 F_ADDR_SIZE
1222 *       - 0x12 F_ACCESS
1223 *       - 0x13 F_PERMISSION
1224 *
1225 * StreamID field reports a virtual device ID. To receive a virtual event for a
1226 * device, a vDEVICE must be allocated via IOMMU_VDEVICE_ALLOC.
1227 */
1228struct iommu_vevent_arm_smmuv3 {
1229	__aligned_le64 evt[4];
1230};
1231
1232/**
1233 * struct iommu_vevent_tegra241_cmdqv - Tegra241 CMDQV IRQ
1234 *                                      (IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV)
1235 * @lvcmdq_err_map: 128-bit logical vcmdq error map, little-endian.
1236 *                  (Refer to register LVCMDQ_ERR_MAPs per VINTF )
1237 *
1238 * The 128-bit register value from HW exclusively reflect the error bits for a
1239 * Virtual Interface represented by a vIOMMU object. Read and report directly.
1240 */
1241struct iommu_vevent_tegra241_cmdqv {
1242	__aligned_le64 lvcmdq_err_map[2];
1243};
1244
1245/**
1246 * struct iommu_veventq_alloc - ioctl(IOMMU_VEVENTQ_ALLOC)
1247 * @size: sizeof(struct iommu_veventq_alloc)
1248 * @flags: Must be 0
1249 * @viommu_id: virtual IOMMU ID to associate the vEVENTQ with
1250 * @type: Type of the vEVENTQ. Must be defined in enum iommu_veventq_type
1251 * @veventq_depth: Maximum number of events in the vEVENTQ
1252 * @out_veventq_id: The ID of the new vEVENTQ
1253 * @out_veventq_fd: The fd of the new vEVENTQ. User space must close the
1254 *                  successfully returned fd after using it
1255 * @__reserved: Must be 0
1256 *
1257 * Explicitly allocate a virtual event queue interface for a vIOMMU. A vIOMMU
1258 * can have multiple FDs for different types, but is confined to one per @type.
1259 * User space should open the @out_veventq_fd to read vEVENTs out of a vEVENTQ,
1260 * if there are vEVENTs available. A vEVENTQ will lose events due to overflow,
1261 * if the number of the vEVENTs hits @veventq_depth.
1262 *
1263 * Each vEVENT in a vEVENTQ encloses a struct iommufd_vevent_header followed by
1264 * a type-specific data structure, in a normal case:
1265 *
1266 * +-+---------+-------+---------+-------+-----+---------+-------+-+
1267 * | | header0 | data0 | header1 | data1 | ... | headerN | dataN | |
1268 * +-+---------+-------+---------+-------+-----+---------+-------+-+
1269 *
1270 * unless a tailing IOMMU_VEVENTQ_FLAG_LOST_EVENTS header is logged (refer to
1271 * struct iommufd_vevent_header).
1272 */
1273struct iommu_veventq_alloc {
1274	__u32 size;
1275	__u32 flags;
1276	__u32 viommu_id;
1277	__u32 type;
1278	__u32 veventq_depth;
1279	__u32 out_veventq_id;
1280	__u32 out_veventq_fd;
1281	__u32 __reserved;
1282};
1283#define IOMMU_VEVENTQ_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VEVENTQ_ALLOC)
1284
1285/**
1286 * enum iommu_hw_queue_type - HW Queue Type
1287 * @IOMMU_HW_QUEUE_TYPE_DEFAULT: Reserved for future use
1288 * @IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM
1289 *                                      SMMUv3) Virtual Command Queue (VCMDQ)
1290 */
1291enum iommu_hw_queue_type {
1292	IOMMU_HW_QUEUE_TYPE_DEFAULT = 0,
1293	/*
1294	 * TEGRA241_CMDQV requirements (otherwise, allocation will fail)
1295	 * - alloc starts from the lowest @index=0 in ascending order
1296	 * - destroy starts from the last allocated @index in descending order
1297	 * - @base_addr must be aligned to @length in bytes and mapped in IOAS
1298	 * - @length must be a power of 2, with a minimum 32 bytes and a maximum
1299	 *   2 ^ idr[1].CMDQS * 16 bytes (use GET_HW_INFO call to read idr[1]
1300	 *   from struct iommu_hw_info_arm_smmuv3)
1301	 * - suggest to back the queue memory with contiguous physical pages or
1302	 *   a single huge page with alignment of the queue size, and limit the
1303	 *   emulated vSMMU's IDR1.CMDQS to log2(huge page size / 16 bytes)
1304	 */
1305	IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV = 1,
1306};
1307
1308/**
1309 * struct iommu_hw_queue_alloc - ioctl(IOMMU_HW_QUEUE_ALLOC)
1310 * @size: sizeof(struct iommu_hw_queue_alloc)
1311 * @flags: Must be 0
1312 * @viommu_id: Virtual IOMMU ID to associate the HW queue with
1313 * @type: One of enum iommu_hw_queue_type
1314 * @index: The logical index to the HW queue per virtual IOMMU for a multi-queue
1315 *         model
1316 * @out_hw_queue_id: The ID of the new HW queue
1317 * @nesting_parent_iova: Base address of the queue memory in the guest physical
1318 *                       address space
1319 * @length: Length of the queue memory
1320 *
1321 * Allocate a HW queue object for a vIOMMU-specific HW-accelerated queue, which
1322 * allows HW to access a guest queue memory described using @nesting_parent_iova
1323 * and @length.
1324 *
1325 * A vIOMMU can allocate multiple queues, but it must use a different @index per
1326 * type to separate each allocation, e.g::
1327 *
1328 *     Type1 HW queue0, Type1 HW queue1, Type2 HW queue0, ...
1329 */
1330struct iommu_hw_queue_alloc {
1331	__u32 size;
1332	__u32 flags;
1333	__u32 viommu_id;
1334	__u32 type;
1335	__u32 index;
1336	__u32 out_hw_queue_id;
1337	__aligned_u64 nesting_parent_iova;
1338	__aligned_u64 length;
1339};
1340#define IOMMU_HW_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HW_QUEUE_ALLOC)
1341#endif
Configure Feed

Configure Feed