Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at master 4137 lines 113 kB view raw
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 */ 6 7#define pr_fmt(fmt) "iommu: " fmt 8 9#include <linux/amba/bus.h> 10#include <linux/device.h> 11#include <linux/kernel.h> 12#include <linux/bits.h> 13#include <linux/bug.h> 14#include <linux/types.h> 15#include <linux/init.h> 16#include <linux/export.h> 17#include <linux/slab.h> 18#include <linux/errno.h> 19#include <linux/host1x_context_bus.h> 20#include <linux/iommu.h> 21#include <linux/iommufd.h> 22#include <linux/idr.h> 23#include <linux/err.h> 24#include <linux/pci.h> 25#include <linux/pci-ats.h> 26#include <linux/bitops.h> 27#include <linux/platform_device.h> 28#include <linux/property.h> 29#include <linux/fsl/mc.h> 30#include <linux/module.h> 31#include <linux/cc_platform.h> 32#include <linux/cdx/cdx_bus.h> 33#include <trace/events/iommu.h> 34#include <linux/sched/mm.h> 35#include <linux/msi.h> 36#include <uapi/linux/iommufd.h> 37#include <linux/generic_pt/iommu.h> 38 39#include "dma-iommu.h" 40#include "iommu-priv.h" 41 42static struct kset *iommu_group_kset; 43static DEFINE_IDA(iommu_group_ida); 44static DEFINE_IDA(iommu_global_pasid_ida); 45 46static unsigned int iommu_def_domain_type __read_mostly; 47static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); 48static u32 iommu_cmd_line __read_mostly; 49 50/* Tags used with xa_tag_pointer() in group->pasid_array */ 51enum { IOMMU_PASID_ARRAY_DOMAIN = 0, IOMMU_PASID_ARRAY_HANDLE = 1 }; 52 53struct iommu_group { 54 struct kobject kobj; 55 struct kobject *devices_kobj; 56 struct list_head devices; 57 struct xarray pasid_array; 58 struct mutex mutex; 59 void *iommu_data; 60 void (*iommu_data_release)(void *iommu_data); 61 char *name; 62 int id; 63 struct iommu_domain *default_domain; 64 struct iommu_domain *blocking_domain; 65 /* 66 * During a group device reset, @resetting_domain points to the physical 67 * domain, while @domain points to the attached domain before the reset. 68 */ 69 struct iommu_domain *resetting_domain; 70 struct iommu_domain *domain; 71 struct list_head entry; 72 unsigned int owner_cnt; 73 void *owner; 74}; 75 76struct group_device { 77 struct list_head list; 78 struct device *dev; 79 char *name; 80}; 81 82/* Iterate over each struct group_device in a struct iommu_group */ 83#define for_each_group_device(group, pos) \ 84 list_for_each_entry(pos, &(group)->devices, list) 85 86struct iommu_group_attribute { 87 struct attribute attr; 88 ssize_t (*show)(struct iommu_group *group, char *buf); 89 ssize_t (*store)(struct iommu_group *group, 90 const char *buf, size_t count); 91}; 92 93static const char * const iommu_group_resv_type_string[] = { 94 [IOMMU_RESV_DIRECT] = "direct", 95 [IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable", 96 [IOMMU_RESV_RESERVED] = "reserved", 97 [IOMMU_RESV_MSI] = "msi", 98 [IOMMU_RESV_SW_MSI] = "msi", 99}; 100 101#define IOMMU_CMD_LINE_DMA_API BIT(0) 102#define IOMMU_CMD_LINE_STRICT BIT(1) 103 104static int bus_iommu_probe(const struct bus_type *bus); 105static int iommu_bus_notifier(struct notifier_block *nb, 106 unsigned long action, void *data); 107static void iommu_release_device(struct device *dev); 108static int __iommu_attach_device(struct iommu_domain *domain, 109 struct device *dev, struct iommu_domain *old); 110static int __iommu_attach_group(struct iommu_domain *domain, 111 struct iommu_group *group); 112static struct iommu_domain *__iommu_paging_domain_alloc_flags(struct device *dev, 113 unsigned int type, 114 unsigned int flags); 115 116enum { 117 IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0, 118}; 119 120static int __iommu_device_set_domain(struct iommu_group *group, 121 struct device *dev, 122 struct iommu_domain *new_domain, 123 struct iommu_domain *old_domain, 124 unsigned int flags); 125static int __iommu_group_set_domain_internal(struct iommu_group *group, 126 struct iommu_domain *new_domain, 127 unsigned int flags); 128static int __iommu_group_set_domain(struct iommu_group *group, 129 struct iommu_domain *new_domain) 130{ 131 return __iommu_group_set_domain_internal(group, new_domain, 0); 132} 133static void __iommu_group_set_domain_nofail(struct iommu_group *group, 134 struct iommu_domain *new_domain) 135{ 136 WARN_ON(__iommu_group_set_domain_internal( 137 group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED)); 138} 139 140static int iommu_setup_default_domain(struct iommu_group *group, 141 int target_type); 142static int iommu_create_device_direct_mappings(struct iommu_domain *domain, 143 struct device *dev); 144static ssize_t iommu_group_store_type(struct iommu_group *group, 145 const char *buf, size_t count); 146static struct group_device *iommu_group_alloc_device(struct iommu_group *group, 147 struct device *dev); 148static void __iommu_group_free_device(struct iommu_group *group, 149 struct group_device *grp_dev); 150static void iommu_domain_init(struct iommu_domain *domain, unsigned int type, 151 const struct iommu_ops *ops); 152 153#define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ 154struct iommu_group_attribute iommu_group_attr_##_name = \ 155 __ATTR(_name, _mode, _show, _store) 156 157#define to_iommu_group_attr(_attr) \ 158 container_of(_attr, struct iommu_group_attribute, attr) 159#define to_iommu_group(_kobj) \ 160 container_of(_kobj, struct iommu_group, kobj) 161 162static LIST_HEAD(iommu_device_list); 163static DEFINE_SPINLOCK(iommu_device_lock); 164 165static const struct bus_type * const iommu_buses[] = { 166 &platform_bus_type, 167#ifdef CONFIG_PCI 168 &pci_bus_type, 169#endif 170#ifdef CONFIG_ARM_AMBA 171 &amba_bustype, 172#endif 173#ifdef CONFIG_FSL_MC_BUS 174 &fsl_mc_bus_type, 175#endif 176#ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS 177 &host1x_context_device_bus_type, 178#endif 179#ifdef CONFIG_CDX_BUS 180 &cdx_bus_type, 181#endif 182}; 183 184/* 185 * Use a function instead of an array here because the domain-type is a 186 * bit-field, so an array would waste memory. 187 */ 188static const char *iommu_domain_type_str(unsigned int t) 189{ 190 switch (t) { 191 case IOMMU_DOMAIN_BLOCKED: 192 return "Blocked"; 193 case IOMMU_DOMAIN_IDENTITY: 194 return "Passthrough"; 195 case IOMMU_DOMAIN_UNMANAGED: 196 return "Unmanaged"; 197 case IOMMU_DOMAIN_DMA: 198 case IOMMU_DOMAIN_DMA_FQ: 199 return "Translated"; 200 case IOMMU_DOMAIN_PLATFORM: 201 return "Platform"; 202 default: 203 return "Unknown"; 204 } 205} 206 207static int __init iommu_subsys_init(void) 208{ 209 struct notifier_block *nb; 210 211 if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) { 212 if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH)) 213 iommu_set_default_passthrough(false); 214 else 215 iommu_set_default_translated(false); 216 217 if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { 218 pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n"); 219 iommu_set_default_translated(false); 220 } 221 } 222 223 if (!iommu_default_passthrough() && !iommu_dma_strict) 224 iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ; 225 226 pr_info("Default domain type: %s%s\n", 227 iommu_domain_type_str(iommu_def_domain_type), 228 (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? 229 " (set via kernel command line)" : ""); 230 231 if (!iommu_default_passthrough()) 232 pr_info("DMA domain TLB invalidation policy: %s mode%s\n", 233 iommu_dma_strict ? "strict" : "lazy", 234 (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? 235 " (set via kernel command line)" : ""); 236 237 nb = kzalloc_objs(*nb, ARRAY_SIZE(iommu_buses)); 238 if (!nb) 239 return -ENOMEM; 240 241 iommu_debug_init(); 242 243 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) { 244 nb[i].notifier_call = iommu_bus_notifier; 245 bus_register_notifier(iommu_buses[i], &nb[i]); 246 } 247 248 return 0; 249} 250subsys_initcall(iommu_subsys_init); 251 252static int remove_iommu_group(struct device *dev, void *data) 253{ 254 if (dev->iommu && dev->iommu->iommu_dev == data) 255 iommu_release_device(dev); 256 257 return 0; 258} 259 260/** 261 * iommu_device_register() - Register an IOMMU hardware instance 262 * @iommu: IOMMU handle for the instance 263 * @ops: IOMMU ops to associate with the instance 264 * @hwdev: (optional) actual instance device, used for fwnode lookup 265 * 266 * Return: 0 on success, or an error. 267 */ 268int iommu_device_register(struct iommu_device *iommu, 269 const struct iommu_ops *ops, struct device *hwdev) 270{ 271 int err = 0; 272 273 /* We need to be able to take module references appropriately */ 274 if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner)) 275 return -EINVAL; 276 277 iommu->ops = ops; 278 if (hwdev) 279 iommu->fwnode = dev_fwnode(hwdev); 280 281 spin_lock(&iommu_device_lock); 282 list_add_tail(&iommu->list, &iommu_device_list); 283 spin_unlock(&iommu_device_lock); 284 285 for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) 286 err = bus_iommu_probe(iommu_buses[i]); 287 if (err) 288 iommu_device_unregister(iommu); 289 else 290 WRITE_ONCE(iommu->ready, true); 291 return err; 292} 293EXPORT_SYMBOL_GPL(iommu_device_register); 294 295void iommu_device_unregister(struct iommu_device *iommu) 296{ 297 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) 298 bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group); 299 300 spin_lock(&iommu_device_lock); 301 list_del(&iommu->list); 302 spin_unlock(&iommu_device_lock); 303 304 /* Pairs with the alloc in generic_single_device_group() */ 305 iommu_group_put(iommu->singleton_group); 306 iommu->singleton_group = NULL; 307} 308EXPORT_SYMBOL_GPL(iommu_device_unregister); 309 310#if IS_ENABLED(CONFIG_IOMMUFD_TEST) 311void iommu_device_unregister_bus(struct iommu_device *iommu, 312 const struct bus_type *bus, 313 struct notifier_block *nb) 314{ 315 bus_unregister_notifier(bus, nb); 316 fwnode_remove_software_node(iommu->fwnode); 317 iommu_device_unregister(iommu); 318} 319EXPORT_SYMBOL_GPL(iommu_device_unregister_bus); 320 321/* 322 * Register an iommu driver against a single bus. This is only used by iommufd 323 * selftest to create a mock iommu driver. The caller must provide 324 * some memory to hold a notifier_block. 325 */ 326int iommu_device_register_bus(struct iommu_device *iommu, 327 const struct iommu_ops *ops, 328 const struct bus_type *bus, 329 struct notifier_block *nb) 330{ 331 int err; 332 333 iommu->ops = ops; 334 nb->notifier_call = iommu_bus_notifier; 335 err = bus_register_notifier(bus, nb); 336 if (err) 337 return err; 338 339 iommu->fwnode = fwnode_create_software_node(NULL, NULL); 340 if (IS_ERR(iommu->fwnode)) { 341 bus_unregister_notifier(bus, nb); 342 return PTR_ERR(iommu->fwnode); 343 } 344 345 spin_lock(&iommu_device_lock); 346 list_add_tail(&iommu->list, &iommu_device_list); 347 spin_unlock(&iommu_device_lock); 348 349 err = bus_iommu_probe(bus); 350 if (err) { 351 iommu_device_unregister_bus(iommu, bus, nb); 352 return err; 353 } 354 WRITE_ONCE(iommu->ready, true); 355 return 0; 356} 357EXPORT_SYMBOL_GPL(iommu_device_register_bus); 358 359int iommu_mock_device_add(struct device *dev, struct iommu_device *iommu) 360{ 361 int rc; 362 363 mutex_lock(&iommu_probe_device_lock); 364 rc = iommu_fwspec_init(dev, iommu->fwnode); 365 mutex_unlock(&iommu_probe_device_lock); 366 367 if (rc) 368 return rc; 369 370 rc = device_add(dev); 371 if (rc) 372 iommu_fwspec_free(dev); 373 return rc; 374} 375EXPORT_SYMBOL_GPL(iommu_mock_device_add); 376#endif 377 378static struct dev_iommu *dev_iommu_get(struct device *dev) 379{ 380 struct dev_iommu *param = dev->iommu; 381 382 lockdep_assert_held(&iommu_probe_device_lock); 383 384 if (param) 385 return param; 386 387 param = kzalloc_obj(*param); 388 if (!param) 389 return NULL; 390 391 mutex_init(&param->lock); 392 dev->iommu = param; 393 return param; 394} 395 396void dev_iommu_free(struct device *dev) 397{ 398 struct dev_iommu *param = dev->iommu; 399 400 dev->iommu = NULL; 401 if (param->fwspec) { 402 fwnode_handle_put(param->fwspec->iommu_fwnode); 403 kfree(param->fwspec); 404 } 405 kfree(param); 406} 407 408/* 409 * Internal equivalent of device_iommu_mapped() for when we care that a device 410 * actually has API ops, and don't want false positives from VFIO-only groups. 411 */ 412static bool dev_has_iommu(struct device *dev) 413{ 414 return dev->iommu && dev->iommu->iommu_dev; 415} 416 417static u32 dev_iommu_get_max_pasids(struct device *dev) 418{ 419 u32 max_pasids = 0, bits = 0; 420 int ret; 421 422 if (dev_is_pci(dev)) { 423 ret = pci_max_pasids(to_pci_dev(dev)); 424 if (ret > 0) 425 max_pasids = ret; 426 } else { 427 ret = device_property_read_u32(dev, "pasid-num-bits", &bits); 428 if (!ret) 429 max_pasids = 1UL << bits; 430 } 431 432 return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); 433} 434 435void dev_iommu_priv_set(struct device *dev, void *priv) 436{ 437 /* FSL_PAMU does something weird */ 438 if (!IS_ENABLED(CONFIG_FSL_PAMU)) 439 lockdep_assert_held(&iommu_probe_device_lock); 440 dev->iommu->priv = priv; 441} 442EXPORT_SYMBOL_GPL(dev_iommu_priv_set); 443 444/* 445 * Init the dev->iommu and dev->iommu_group in the struct device and get the 446 * driver probed 447 */ 448static int iommu_init_device(struct device *dev) 449{ 450 const struct iommu_ops *ops; 451 struct iommu_device *iommu_dev; 452 struct iommu_group *group; 453 int ret; 454 455 if (!dev_iommu_get(dev)) 456 return -ENOMEM; 457 /* 458 * For FDT-based systems and ACPI IORT/VIOT, the common firmware parsing 459 * is buried in the bus dma_configure path. Properly unpicking that is 460 * still a big job, so for now just invoke the whole thing. The device 461 * already having a driver bound means dma_configure has already run and 462 * found no IOMMU to wait for, so there's no point calling it again. 463 */ 464 if (!dev->iommu->fwspec && !dev->driver && dev->bus->dma_configure) { 465 mutex_unlock(&iommu_probe_device_lock); 466 dev->bus->dma_configure(dev); 467 mutex_lock(&iommu_probe_device_lock); 468 /* If another instance finished the job for us, skip it */ 469 if (!dev->iommu || dev->iommu_group) 470 return -ENODEV; 471 } 472 /* 473 * At this point, relevant devices either now have a fwspec which will 474 * match ops registered with a non-NULL fwnode, or we can reasonably 475 * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can 476 * be present, and that any of their registered instances has suitable 477 * ops for probing, and thus cheekily co-opt the same mechanism. 478 */ 479 ops = iommu_fwspec_ops(dev->iommu->fwspec); 480 if (!ops) { 481 ret = -ENODEV; 482 goto err_free; 483 } 484 485 if (!try_module_get(ops->owner)) { 486 ret = -EINVAL; 487 goto err_free; 488 } 489 490 iommu_dev = ops->probe_device(dev); 491 if (IS_ERR(iommu_dev)) { 492 ret = PTR_ERR(iommu_dev); 493 goto err_module_put; 494 } 495 dev->iommu->iommu_dev = iommu_dev; 496 497 ret = iommu_device_link(iommu_dev, dev); 498 if (ret) 499 goto err_release; 500 501 group = ops->device_group(dev); 502 if (WARN_ON_ONCE(group == NULL)) 503 group = ERR_PTR(-EINVAL); 504 if (IS_ERR(group)) { 505 ret = PTR_ERR(group); 506 goto err_unlink; 507 } 508 dev->iommu_group = group; 509 510 dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); 511 if (ops->is_attach_deferred) 512 dev->iommu->attach_deferred = ops->is_attach_deferred(dev); 513 return 0; 514 515err_unlink: 516 iommu_device_unlink(iommu_dev, dev); 517err_release: 518 if (ops->release_device) 519 ops->release_device(dev); 520err_module_put: 521 module_put(ops->owner); 522err_free: 523 dev->iommu->iommu_dev = NULL; 524 dev_iommu_free(dev); 525 return ret; 526} 527 528static void iommu_deinit_device(struct device *dev) 529{ 530 struct iommu_group *group = dev->iommu_group; 531 const struct iommu_ops *ops = dev_iommu_ops(dev); 532 533 lockdep_assert_held(&group->mutex); 534 535 iommu_device_unlink(dev->iommu->iommu_dev, dev); 536 537 /* 538 * release_device() must stop using any attached domain on the device. 539 * If there are still other devices in the group, they are not affected 540 * by this callback. 541 * 542 * If the iommu driver provides release_domain, the core code ensures 543 * that domain is attached prior to calling release_device. Drivers can 544 * use this to enforce a translation on the idle iommu. Typically, the 545 * global static blocked_domain is a good choice. 546 * 547 * Otherwise, the iommu driver must set the device to either an identity 548 * or a blocking translation in release_device() and stop using any 549 * domain pointer, as it is going to be freed. 550 * 551 * Regardless, if a delayed attach never occurred, then the release 552 * should still avoid touching any hardware configuration either. 553 */ 554 if (!dev->iommu->attach_deferred && ops->release_domain) { 555 struct iommu_domain *release_domain = ops->release_domain; 556 557 /* 558 * If the device requires direct mappings then it should not 559 * be parked on a BLOCKED domain during release as that would 560 * break the direct mappings. 561 */ 562 if (dev->iommu->require_direct && ops->identity_domain && 563 release_domain == ops->blocked_domain) 564 release_domain = ops->identity_domain; 565 566 release_domain->ops->attach_dev(release_domain, dev, 567 group->domain); 568 } 569 570 if (ops->release_device) 571 ops->release_device(dev); 572 573 /* 574 * If this is the last driver to use the group then we must free the 575 * domains before we do the module_put(). 576 */ 577 if (list_empty(&group->devices)) { 578 if (group->default_domain) { 579 iommu_domain_free(group->default_domain); 580 group->default_domain = NULL; 581 } 582 if (group->blocking_domain) { 583 iommu_domain_free(group->blocking_domain); 584 group->blocking_domain = NULL; 585 } 586 group->domain = NULL; 587 } 588 589 /* Caller must put iommu_group */ 590 dev->iommu_group = NULL; 591 module_put(ops->owner); 592 dev_iommu_free(dev); 593#ifdef CONFIG_IOMMU_DMA 594 dev->dma_iommu = false; 595#endif 596} 597 598static struct iommu_domain *pasid_array_entry_to_domain(void *entry) 599{ 600 if (xa_pointer_tag(entry) == IOMMU_PASID_ARRAY_DOMAIN) 601 return xa_untag_pointer(entry); 602 return ((struct iommu_attach_handle *)xa_untag_pointer(entry))->domain; 603} 604 605DEFINE_MUTEX(iommu_probe_device_lock); 606 607static int __iommu_probe_device(struct device *dev, struct list_head *group_list) 608{ 609 struct iommu_group *group; 610 struct group_device *gdev; 611 int ret; 612 613 /* 614 * Serialise to avoid races between IOMMU drivers registering in 615 * parallel and/or the "replay" calls from ACPI/OF code via client 616 * driver probe. Once the latter have been cleaned up we should 617 * probably be able to use device_lock() here to minimise the scope, 618 * but for now enforcing a simple global ordering is fine. 619 */ 620 lockdep_assert_held(&iommu_probe_device_lock); 621 622 /* Device is probed already if in a group */ 623 if (dev->iommu_group) 624 return 0; 625 626 ret = iommu_init_device(dev); 627 if (ret) 628 return ret; 629 /* 630 * And if we do now see any replay calls, they would indicate someone 631 * misusing the dma_configure path outside bus code. 632 */ 633 if (dev->driver) 634 dev_WARN(dev, "late IOMMU probe at driver bind, something fishy here!\n"); 635 636 group = dev->iommu_group; 637 gdev = iommu_group_alloc_device(group, dev); 638 mutex_lock(&group->mutex); 639 if (IS_ERR(gdev)) { 640 ret = PTR_ERR(gdev); 641 goto err_put_group; 642 } 643 644 /* 645 * The gdev must be in the list before calling 646 * iommu_setup_default_domain() 647 */ 648 list_add_tail(&gdev->list, &group->devices); 649 WARN_ON(group->default_domain && !group->domain); 650 if (group->default_domain) 651 iommu_create_device_direct_mappings(group->default_domain, dev); 652 if (group->domain) { 653 ret = __iommu_device_set_domain(group, dev, group->domain, NULL, 654 0); 655 if (ret) 656 goto err_remove_gdev; 657 } else if (!group->default_domain && !group_list) { 658 ret = iommu_setup_default_domain(group, 0); 659 if (ret) 660 goto err_remove_gdev; 661 } else if (!group->default_domain) { 662 /* 663 * With a group_list argument we defer the default_domain setup 664 * to the caller by providing a de-duplicated list of groups 665 * that need further setup. 666 */ 667 if (list_empty(&group->entry)) 668 list_add_tail(&group->entry, group_list); 669 } 670 671 if (group->default_domain) 672 iommu_setup_dma_ops(dev, group->default_domain); 673 674 mutex_unlock(&group->mutex); 675 676 return 0; 677 678err_remove_gdev: 679 list_del(&gdev->list); 680 __iommu_group_free_device(group, gdev); 681err_put_group: 682 iommu_deinit_device(dev); 683 mutex_unlock(&group->mutex); 684 iommu_group_put(group); 685 686 return ret; 687} 688 689int iommu_probe_device(struct device *dev) 690{ 691 const struct iommu_ops *ops; 692 int ret; 693 694 mutex_lock(&iommu_probe_device_lock); 695 ret = __iommu_probe_device(dev, NULL); 696 mutex_unlock(&iommu_probe_device_lock); 697 if (ret) 698 return ret; 699 700 ops = dev_iommu_ops(dev); 701 if (ops->probe_finalize) 702 ops->probe_finalize(dev); 703 704 return 0; 705} 706 707static void __iommu_group_free_device(struct iommu_group *group, 708 struct group_device *grp_dev) 709{ 710 struct device *dev = grp_dev->dev; 711 712 sysfs_remove_link(group->devices_kobj, grp_dev->name); 713 sysfs_remove_link(&dev->kobj, "iommu_group"); 714 715 trace_remove_device_from_group(group->id, dev); 716 717 /* 718 * If the group has become empty then ownership must have been 719 * released, and the current domain must be set back to NULL or 720 * the default domain. 721 */ 722 if (list_empty(&group->devices)) 723 WARN_ON(group->owner_cnt || 724 group->domain != group->default_domain); 725 726 kfree(grp_dev->name); 727 kfree(grp_dev); 728} 729 730/* Remove the iommu_group from the struct device. */ 731static void __iommu_group_remove_device(struct device *dev) 732{ 733 struct iommu_group *group = dev->iommu_group; 734 struct group_device *device; 735 736 mutex_lock(&group->mutex); 737 for_each_group_device(group, device) { 738 if (device->dev != dev) 739 continue; 740 741 list_del(&device->list); 742 __iommu_group_free_device(group, device); 743 if (dev_has_iommu(dev)) 744 iommu_deinit_device(dev); 745 else 746 dev->iommu_group = NULL; 747 break; 748 } 749 mutex_unlock(&group->mutex); 750 751 /* 752 * Pairs with the get in iommu_init_device() or 753 * iommu_group_add_device() 754 */ 755 iommu_group_put(group); 756} 757 758static void iommu_release_device(struct device *dev) 759{ 760 struct iommu_group *group = dev->iommu_group; 761 762 if (group) 763 __iommu_group_remove_device(dev); 764 765 /* Free any fwspec if no iommu_driver was ever attached */ 766 if (dev->iommu) 767 dev_iommu_free(dev); 768} 769 770static int __init iommu_set_def_domain_type(char *str) 771{ 772 bool pt; 773 int ret; 774 775 ret = kstrtobool(str, &pt); 776 if (ret) 777 return ret; 778 779 if (pt) 780 iommu_set_default_passthrough(true); 781 else 782 iommu_set_default_translated(true); 783 784 return 0; 785} 786early_param("iommu.passthrough", iommu_set_def_domain_type); 787 788static int __init iommu_dma_setup(char *str) 789{ 790 int ret = kstrtobool(str, &iommu_dma_strict); 791 792 if (!ret) 793 iommu_cmd_line |= IOMMU_CMD_LINE_STRICT; 794 return ret; 795} 796early_param("iommu.strict", iommu_dma_setup); 797 798void iommu_set_dma_strict(void) 799{ 800 iommu_dma_strict = true; 801 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ) 802 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 803} 804 805static ssize_t iommu_group_attr_show(struct kobject *kobj, 806 struct attribute *__attr, char *buf) 807{ 808 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 809 struct iommu_group *group = to_iommu_group(kobj); 810 ssize_t ret = -EIO; 811 812 if (attr->show) 813 ret = attr->show(group, buf); 814 return ret; 815} 816 817static ssize_t iommu_group_attr_store(struct kobject *kobj, 818 struct attribute *__attr, 819 const char *buf, size_t count) 820{ 821 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 822 struct iommu_group *group = to_iommu_group(kobj); 823 ssize_t ret = -EIO; 824 825 if (attr->store) 826 ret = attr->store(group, buf, count); 827 return ret; 828} 829 830static const struct sysfs_ops iommu_group_sysfs_ops = { 831 .show = iommu_group_attr_show, 832 .store = iommu_group_attr_store, 833}; 834 835static int iommu_group_create_file(struct iommu_group *group, 836 struct iommu_group_attribute *attr) 837{ 838 return sysfs_create_file(&group->kobj, &attr->attr); 839} 840 841static void iommu_group_remove_file(struct iommu_group *group, 842 struct iommu_group_attribute *attr) 843{ 844 sysfs_remove_file(&group->kobj, &attr->attr); 845} 846 847static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) 848{ 849 return sysfs_emit(buf, "%s\n", group->name); 850} 851 852/** 853 * iommu_insert_resv_region - Insert a new region in the 854 * list of reserved regions. 855 * @new: new region to insert 856 * @regions: list of regions 857 * 858 * Elements are sorted by start address and overlapping segments 859 * of the same type are merged. 860 */ 861static int iommu_insert_resv_region(struct iommu_resv_region *new, 862 struct list_head *regions) 863{ 864 struct iommu_resv_region *iter, *tmp, *nr, *top; 865 LIST_HEAD(stack); 866 867 nr = iommu_alloc_resv_region(new->start, new->length, 868 new->prot, new->type, GFP_KERNEL); 869 if (!nr) 870 return -ENOMEM; 871 872 /* First add the new element based on start address sorting */ 873 list_for_each_entry(iter, regions, list) { 874 if (nr->start < iter->start || 875 (nr->start == iter->start && nr->type <= iter->type)) 876 break; 877 } 878 list_add_tail(&nr->list, &iter->list); 879 880 /* Merge overlapping segments of type nr->type in @regions, if any */ 881 list_for_each_entry_safe(iter, tmp, regions, list) { 882 phys_addr_t top_end, iter_end = iter->start + iter->length - 1; 883 884 /* no merge needed on elements of different types than @new */ 885 if (iter->type != new->type) { 886 list_move_tail(&iter->list, &stack); 887 continue; 888 } 889 890 /* look for the last stack element of same type as @iter */ 891 list_for_each_entry_reverse(top, &stack, list) 892 if (top->type == iter->type) 893 goto check_overlap; 894 895 list_move_tail(&iter->list, &stack); 896 continue; 897 898check_overlap: 899 top_end = top->start + top->length - 1; 900 901 if (iter->start > top_end + 1) { 902 list_move_tail(&iter->list, &stack); 903 } else { 904 top->length = max(top_end, iter_end) - top->start + 1; 905 list_del(&iter->list); 906 kfree(iter); 907 } 908 } 909 list_splice(&stack, regions); 910 return 0; 911} 912 913static int 914iommu_insert_device_resv_regions(struct list_head *dev_resv_regions, 915 struct list_head *group_resv_regions) 916{ 917 struct iommu_resv_region *entry; 918 int ret = 0; 919 920 list_for_each_entry(entry, dev_resv_regions, list) { 921 ret = iommu_insert_resv_region(entry, group_resv_regions); 922 if (ret) 923 break; 924 } 925 return ret; 926} 927 928int iommu_get_group_resv_regions(struct iommu_group *group, 929 struct list_head *head) 930{ 931 struct group_device *device; 932 int ret = 0; 933 934 mutex_lock(&group->mutex); 935 for_each_group_device(group, device) { 936 struct list_head dev_resv_regions; 937 938 /* 939 * Non-API groups still expose reserved_regions in sysfs, 940 * so filter out calls that get here that way. 941 */ 942 if (!dev_has_iommu(device->dev)) 943 break; 944 945 INIT_LIST_HEAD(&dev_resv_regions); 946 iommu_get_resv_regions(device->dev, &dev_resv_regions); 947 ret = iommu_insert_device_resv_regions(&dev_resv_regions, head); 948 iommu_put_resv_regions(device->dev, &dev_resv_regions); 949 if (ret) 950 break; 951 } 952 mutex_unlock(&group->mutex); 953 return ret; 954} 955EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions); 956 957static ssize_t iommu_group_show_resv_regions(struct iommu_group *group, 958 char *buf) 959{ 960 struct iommu_resv_region *region, *next; 961 struct list_head group_resv_regions; 962 int offset = 0; 963 964 INIT_LIST_HEAD(&group_resv_regions); 965 iommu_get_group_resv_regions(group, &group_resv_regions); 966 967 list_for_each_entry_safe(region, next, &group_resv_regions, list) { 968 offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n", 969 (long long)region->start, 970 (long long)(region->start + 971 region->length - 1), 972 iommu_group_resv_type_string[region->type]); 973 kfree(region); 974 } 975 976 return offset; 977} 978 979static ssize_t iommu_group_show_type(struct iommu_group *group, 980 char *buf) 981{ 982 char *type = "unknown"; 983 984 mutex_lock(&group->mutex); 985 if (group->default_domain) { 986 switch (group->default_domain->type) { 987 case IOMMU_DOMAIN_BLOCKED: 988 type = "blocked"; 989 break; 990 case IOMMU_DOMAIN_IDENTITY: 991 type = "identity"; 992 break; 993 case IOMMU_DOMAIN_UNMANAGED: 994 type = "unmanaged"; 995 break; 996 case IOMMU_DOMAIN_DMA: 997 type = "DMA"; 998 break; 999 case IOMMU_DOMAIN_DMA_FQ: 1000 type = "DMA-FQ"; 1001 break; 1002 } 1003 } 1004 mutex_unlock(&group->mutex); 1005 1006 return sysfs_emit(buf, "%s\n", type); 1007} 1008 1009static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); 1010 1011static IOMMU_GROUP_ATTR(reserved_regions, 0444, 1012 iommu_group_show_resv_regions, NULL); 1013 1014static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type, 1015 iommu_group_store_type); 1016 1017static void iommu_group_release(struct kobject *kobj) 1018{ 1019 struct iommu_group *group = to_iommu_group(kobj); 1020 1021 pr_debug("Releasing group %d\n", group->id); 1022 1023 if (group->iommu_data_release) 1024 group->iommu_data_release(group->iommu_data); 1025 1026 ida_free(&iommu_group_ida, group->id); 1027 1028 /* Domains are free'd by iommu_deinit_device() */ 1029 WARN_ON(group->default_domain); 1030 WARN_ON(group->blocking_domain); 1031 1032 kfree(group->name); 1033 kfree(group); 1034} 1035 1036static const struct kobj_type iommu_group_ktype = { 1037 .sysfs_ops = &iommu_group_sysfs_ops, 1038 .release = iommu_group_release, 1039}; 1040 1041/** 1042 * iommu_group_alloc - Allocate a new group 1043 * 1044 * This function is called by an iommu driver to allocate a new iommu 1045 * group. The iommu group represents the minimum granularity of the iommu. 1046 * Upon successful return, the caller holds a reference to the supplied 1047 * group in order to hold the group until devices are added. Use 1048 * iommu_group_put() to release this extra reference count, allowing the 1049 * group to be automatically reclaimed once it has no devices or external 1050 * references. 1051 */ 1052struct iommu_group *iommu_group_alloc(void) 1053{ 1054 struct iommu_group *group; 1055 int ret; 1056 1057 group = kzalloc_obj(*group); 1058 if (!group) 1059 return ERR_PTR(-ENOMEM); 1060 1061 group->kobj.kset = iommu_group_kset; 1062 mutex_init(&group->mutex); 1063 INIT_LIST_HEAD(&group->devices); 1064 INIT_LIST_HEAD(&group->entry); 1065 xa_init(&group->pasid_array); 1066 1067 ret = ida_alloc(&iommu_group_ida, GFP_KERNEL); 1068 if (ret < 0) { 1069 kfree(group); 1070 return ERR_PTR(ret); 1071 } 1072 group->id = ret; 1073 1074 ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, 1075 NULL, "%d", group->id); 1076 if (ret) { 1077 kobject_put(&group->kobj); 1078 return ERR_PTR(ret); 1079 } 1080 1081 group->devices_kobj = kobject_create_and_add("devices", &group->kobj); 1082 if (!group->devices_kobj) { 1083 kobject_put(&group->kobj); /* triggers .release & free */ 1084 return ERR_PTR(-ENOMEM); 1085 } 1086 1087 /* 1088 * The devices_kobj holds a reference on the group kobject, so 1089 * as long as that exists so will the group. We can therefore 1090 * use the devices_kobj for reference counting. 1091 */ 1092 kobject_put(&group->kobj); 1093 1094 ret = iommu_group_create_file(group, 1095 &iommu_group_attr_reserved_regions); 1096 if (ret) { 1097 kobject_put(group->devices_kobj); 1098 return ERR_PTR(ret); 1099 } 1100 1101 ret = iommu_group_create_file(group, &iommu_group_attr_type); 1102 if (ret) { 1103 kobject_put(group->devices_kobj); 1104 return ERR_PTR(ret); 1105 } 1106 1107 pr_debug("Allocated group %d\n", group->id); 1108 1109 return group; 1110} 1111EXPORT_SYMBOL_GPL(iommu_group_alloc); 1112 1113/** 1114 * iommu_group_get_iommudata - retrieve iommu_data registered for a group 1115 * @group: the group 1116 * 1117 * iommu drivers can store data in the group for use when doing iommu 1118 * operations. This function provides a way to retrieve it. Caller 1119 * should hold a group reference. 1120 */ 1121void *iommu_group_get_iommudata(struct iommu_group *group) 1122{ 1123 return group->iommu_data; 1124} 1125EXPORT_SYMBOL_GPL(iommu_group_get_iommudata); 1126 1127/** 1128 * iommu_group_set_iommudata - set iommu_data for a group 1129 * @group: the group 1130 * @iommu_data: new data 1131 * @release: release function for iommu_data 1132 * 1133 * iommu drivers can store data in the group for use when doing iommu 1134 * operations. This function provides a way to set the data after 1135 * the group has been allocated. Caller should hold a group reference. 1136 */ 1137void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, 1138 void (*release)(void *iommu_data)) 1139{ 1140 group->iommu_data = iommu_data; 1141 group->iommu_data_release = release; 1142} 1143EXPORT_SYMBOL_GPL(iommu_group_set_iommudata); 1144 1145/** 1146 * iommu_group_set_name - set name for a group 1147 * @group: the group 1148 * @name: name 1149 * 1150 * Allow iommu driver to set a name for a group. When set it will 1151 * appear in a name attribute file under the group in sysfs. 1152 */ 1153int iommu_group_set_name(struct iommu_group *group, const char *name) 1154{ 1155 int ret; 1156 1157 if (group->name) { 1158 iommu_group_remove_file(group, &iommu_group_attr_name); 1159 kfree(group->name); 1160 group->name = NULL; 1161 if (!name) 1162 return 0; 1163 } 1164 1165 group->name = kstrdup(name, GFP_KERNEL); 1166 if (!group->name) 1167 return -ENOMEM; 1168 1169 ret = iommu_group_create_file(group, &iommu_group_attr_name); 1170 if (ret) { 1171 kfree(group->name); 1172 group->name = NULL; 1173 return ret; 1174 } 1175 1176 return 0; 1177} 1178EXPORT_SYMBOL_GPL(iommu_group_set_name); 1179 1180static int iommu_create_device_direct_mappings(struct iommu_domain *domain, 1181 struct device *dev) 1182{ 1183 struct iommu_resv_region *entry; 1184 LIST_HEAD(mappings); 1185 unsigned long pg_size; 1186 int ret = 0; 1187 1188 pg_size = domain->pgsize_bitmap ? 1UL << __ffs(domain->pgsize_bitmap) : 0; 1189 1190 if (WARN_ON_ONCE(iommu_is_dma_domain(domain) && !pg_size)) 1191 return -EINVAL; 1192 1193 iommu_get_resv_regions(dev, &mappings); 1194 1195 /* We need to consider overlapping regions for different devices */ 1196 list_for_each_entry(entry, &mappings, list) { 1197 dma_addr_t start, end, addr; 1198 size_t map_size = 0; 1199 1200 if (entry->type == IOMMU_RESV_DIRECT) 1201 dev->iommu->require_direct = 1; 1202 1203 if ((entry->type != IOMMU_RESV_DIRECT && 1204 entry->type != IOMMU_RESV_DIRECT_RELAXABLE) || 1205 !iommu_is_dma_domain(domain)) 1206 continue; 1207 1208 start = ALIGN(entry->start, pg_size); 1209 end = ALIGN(entry->start + entry->length, pg_size); 1210 1211 for (addr = start; addr <= end; addr += pg_size) { 1212 phys_addr_t phys_addr; 1213 1214 if (addr == end) 1215 goto map_end; 1216 1217 /* 1218 * Return address by iommu_iova_to_phys for 0 is 1219 * ambiguous. Offset to address 1 if addr is 0. 1220 */ 1221 phys_addr = iommu_iova_to_phys(domain, addr ? addr : 1); 1222 if (!phys_addr) { 1223 map_size += pg_size; 1224 continue; 1225 } 1226 1227map_end: 1228 if (map_size) { 1229 ret = iommu_map(domain, addr - map_size, 1230 addr - map_size, map_size, 1231 entry->prot, GFP_KERNEL); 1232 if (ret) 1233 goto out; 1234 map_size = 0; 1235 } 1236 } 1237 1238 } 1239out: 1240 iommu_put_resv_regions(dev, &mappings); 1241 1242 return ret; 1243} 1244 1245/* This is undone by __iommu_group_free_device() */ 1246static struct group_device *iommu_group_alloc_device(struct iommu_group *group, 1247 struct device *dev) 1248{ 1249 int ret, i = 0; 1250 struct group_device *device; 1251 1252 device = kzalloc_obj(*device); 1253 if (!device) 1254 return ERR_PTR(-ENOMEM); 1255 1256 device->dev = dev; 1257 1258 ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group"); 1259 if (ret) 1260 goto err_free_device; 1261 1262 device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj)); 1263rename: 1264 if (!device->name) { 1265 ret = -ENOMEM; 1266 goto err_remove_link; 1267 } 1268 1269 ret = sysfs_create_link_nowarn(group->devices_kobj, 1270 &dev->kobj, device->name); 1271 if (ret) { 1272 if (ret == -EEXIST && i >= 0) { 1273 /* 1274 * Account for the slim chance of collision 1275 * and append an instance to the name. 1276 */ 1277 kfree(device->name); 1278 device->name = kasprintf(GFP_KERNEL, "%s.%d", 1279 kobject_name(&dev->kobj), i++); 1280 goto rename; 1281 } 1282 goto err_free_name; 1283 } 1284 1285 trace_add_device_to_group(group->id, dev); 1286 1287 dev_info(dev, "Adding to iommu group %d\n", group->id); 1288 1289 return device; 1290 1291err_free_name: 1292 kfree(device->name); 1293err_remove_link: 1294 sysfs_remove_link(&dev->kobj, "iommu_group"); 1295err_free_device: 1296 kfree(device); 1297 dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); 1298 return ERR_PTR(ret); 1299} 1300 1301/** 1302 * iommu_group_add_device - add a device to an iommu group 1303 * @group: the group into which to add the device (reference should be held) 1304 * @dev: the device 1305 * 1306 * This function is called by an iommu driver to add a device into a 1307 * group. Adding a device increments the group reference count. 1308 */ 1309int iommu_group_add_device(struct iommu_group *group, struct device *dev) 1310{ 1311 struct group_device *gdev; 1312 1313 gdev = iommu_group_alloc_device(group, dev); 1314 if (IS_ERR(gdev)) 1315 return PTR_ERR(gdev); 1316 1317 iommu_group_ref_get(group); 1318 dev->iommu_group = group; 1319 1320 mutex_lock(&group->mutex); 1321 list_add_tail(&gdev->list, &group->devices); 1322 mutex_unlock(&group->mutex); 1323 return 0; 1324} 1325EXPORT_SYMBOL_GPL(iommu_group_add_device); 1326 1327/** 1328 * iommu_group_remove_device - remove a device from it's current group 1329 * @dev: device to be removed 1330 * 1331 * This function is called by an iommu driver to remove the device from 1332 * it's current group. This decrements the iommu group reference count. 1333 */ 1334void iommu_group_remove_device(struct device *dev) 1335{ 1336 struct iommu_group *group = dev->iommu_group; 1337 1338 if (!group) 1339 return; 1340 1341 dev_info(dev, "Removing from iommu group %d\n", group->id); 1342 1343 __iommu_group_remove_device(dev); 1344} 1345EXPORT_SYMBOL_GPL(iommu_group_remove_device); 1346 1347#if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API) 1348/** 1349 * iommu_group_mutex_assert - Check device group mutex lock 1350 * @dev: the device that has group param set 1351 * 1352 * This function is called by an iommu driver to check whether it holds 1353 * group mutex lock for the given device or not. 1354 * 1355 * Note that this function must be called after device group param is set. 1356 */ 1357void iommu_group_mutex_assert(struct device *dev) 1358{ 1359 struct iommu_group *group = dev->iommu_group; 1360 1361 lockdep_assert_held(&group->mutex); 1362} 1363EXPORT_SYMBOL_GPL(iommu_group_mutex_assert); 1364#endif 1365 1366static struct device *iommu_group_first_dev(struct iommu_group *group) 1367{ 1368 lockdep_assert_held(&group->mutex); 1369 return list_first_entry(&group->devices, struct group_device, list)->dev; 1370} 1371 1372/** 1373 * iommu_group_for_each_dev - iterate over each device in the group 1374 * @group: the group 1375 * @data: caller opaque data to be passed to callback function 1376 * @fn: caller supplied callback function 1377 * 1378 * This function is called by group users to iterate over group devices. 1379 * Callers should hold a reference count to the group during callback. 1380 * The group->mutex is held across callbacks, which will block calls to 1381 * iommu_group_add/remove_device. 1382 */ 1383int iommu_group_for_each_dev(struct iommu_group *group, void *data, 1384 int (*fn)(struct device *, void *)) 1385{ 1386 struct group_device *device; 1387 int ret = 0; 1388 1389 mutex_lock(&group->mutex); 1390 for_each_group_device(group, device) { 1391 ret = fn(device->dev, data); 1392 if (ret) 1393 break; 1394 } 1395 mutex_unlock(&group->mutex); 1396 1397 return ret; 1398} 1399EXPORT_SYMBOL_GPL(iommu_group_for_each_dev); 1400 1401/** 1402 * iommu_group_get - Return the group for a device and increment reference 1403 * @dev: get the group that this device belongs to 1404 * 1405 * This function is called by iommu drivers and users to get the group 1406 * for the specified device. If found, the group is returned and the group 1407 * reference in incremented, else NULL. 1408 */ 1409struct iommu_group *iommu_group_get(struct device *dev) 1410{ 1411 struct iommu_group *group = dev->iommu_group; 1412 1413 if (group) 1414 kobject_get(group->devices_kobj); 1415 1416 return group; 1417} 1418EXPORT_SYMBOL_GPL(iommu_group_get); 1419 1420/** 1421 * iommu_group_ref_get - Increment reference on a group 1422 * @group: the group to use, must not be NULL 1423 * 1424 * This function is called by iommu drivers to take additional references on an 1425 * existing group. Returns the given group for convenience. 1426 */ 1427struct iommu_group *iommu_group_ref_get(struct iommu_group *group) 1428{ 1429 kobject_get(group->devices_kobj); 1430 return group; 1431} 1432EXPORT_SYMBOL_GPL(iommu_group_ref_get); 1433 1434/** 1435 * iommu_group_put - Decrement group reference 1436 * @group: the group to use 1437 * 1438 * This function is called by iommu drivers and users to release the 1439 * iommu group. Once the reference count is zero, the group is released. 1440 */ 1441void iommu_group_put(struct iommu_group *group) 1442{ 1443 if (group) 1444 kobject_put(group->devices_kobj); 1445} 1446EXPORT_SYMBOL_GPL(iommu_group_put); 1447 1448/** 1449 * iommu_group_id - Return ID for a group 1450 * @group: the group to ID 1451 * 1452 * Return the unique ID for the group matching the sysfs group number. 1453 */ 1454int iommu_group_id(struct iommu_group *group) 1455{ 1456 return group->id; 1457} 1458EXPORT_SYMBOL_GPL(iommu_group_id); 1459 1460static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1461 unsigned long *devfns); 1462 1463/* 1464 * To consider a PCI device isolated, we require ACS to support Source 1465 * Validation, Request Redirection, Completer Redirection, and Upstream 1466 * Forwarding. This effectively means that devices cannot spoof their 1467 * requester ID, requests and completions cannot be redirected, and all 1468 * transactions are forwarded upstream, even as it passes through a 1469 * bridge where the target device is downstream. 1470 */ 1471#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) 1472 1473/* 1474 * For multifunction devices which are not isolated from each other, find 1475 * all the other non-isolated functions and look for existing groups. For 1476 * each function, we also need to look for aliases to or from other devices 1477 * that may already have a group. 1478 */ 1479static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev, 1480 unsigned long *devfns) 1481{ 1482 struct pci_dev *tmp = NULL; 1483 struct iommu_group *group; 1484 1485 if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) 1486 return NULL; 1487 1488 for_each_pci_dev(tmp) { 1489 if (tmp == pdev || tmp->bus != pdev->bus || 1490 PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || 1491 pci_acs_enabled(tmp, REQ_ACS_FLAGS)) 1492 continue; 1493 1494 group = get_pci_alias_group(tmp, devfns); 1495 if (group) { 1496 pci_dev_put(tmp); 1497 return group; 1498 } 1499 } 1500 1501 return NULL; 1502} 1503 1504/* 1505 * Look for aliases to or from the given device for existing groups. DMA 1506 * aliases are only supported on the same bus, therefore the search 1507 * space is quite small (especially since we're really only looking at pcie 1508 * device, and therefore only expect multiple slots on the root complex or 1509 * downstream switch ports). It's conceivable though that a pair of 1510 * multifunction devices could have aliases between them that would cause a 1511 * loop. To prevent this, we use a bitmap to track where we've been. 1512 */ 1513static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1514 unsigned long *devfns) 1515{ 1516 struct pci_dev *tmp = NULL; 1517 struct iommu_group *group; 1518 1519 if (test_and_set_bit(pdev->devfn & 0xff, devfns)) 1520 return NULL; 1521 1522 group = iommu_group_get(&pdev->dev); 1523 if (group) 1524 return group; 1525 1526 for_each_pci_dev(tmp) { 1527 if (tmp == pdev || tmp->bus != pdev->bus) 1528 continue; 1529 1530 /* We alias them or they alias us */ 1531 if (pci_devs_are_dma_aliases(pdev, tmp)) { 1532 group = get_pci_alias_group(tmp, devfns); 1533 if (group) { 1534 pci_dev_put(tmp); 1535 return group; 1536 } 1537 1538 group = get_pci_function_alias_group(tmp, devfns); 1539 if (group) { 1540 pci_dev_put(tmp); 1541 return group; 1542 } 1543 } 1544 } 1545 1546 return NULL; 1547} 1548 1549struct group_for_pci_data { 1550 struct pci_dev *pdev; 1551 struct iommu_group *group; 1552}; 1553 1554/* 1555 * DMA alias iterator callback, return the last seen device. Stop and return 1556 * the IOMMU group if we find one along the way. 1557 */ 1558static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) 1559{ 1560 struct group_for_pci_data *data = opaque; 1561 1562 data->pdev = pdev; 1563 data->group = iommu_group_get(&pdev->dev); 1564 1565 return data->group != NULL; 1566} 1567 1568/* 1569 * Generic device_group call-back function. It just allocates one 1570 * iommu-group per device. 1571 */ 1572struct iommu_group *generic_device_group(struct device *dev) 1573{ 1574 return iommu_group_alloc(); 1575} 1576EXPORT_SYMBOL_GPL(generic_device_group); 1577 1578/* 1579 * Generic device_group call-back function. It just allocates one 1580 * iommu-group per iommu driver instance shared by every device 1581 * probed by that iommu driver. 1582 */ 1583struct iommu_group *generic_single_device_group(struct device *dev) 1584{ 1585 struct iommu_device *iommu = dev->iommu->iommu_dev; 1586 1587 if (!iommu->singleton_group) { 1588 struct iommu_group *group; 1589 1590 group = iommu_group_alloc(); 1591 if (IS_ERR(group)) 1592 return group; 1593 iommu->singleton_group = group; 1594 } 1595 return iommu_group_ref_get(iommu->singleton_group); 1596} 1597EXPORT_SYMBOL_GPL(generic_single_device_group); 1598 1599/* 1600 * Use standard PCI bus topology, isolation features, and DMA alias quirks 1601 * to find or create an IOMMU group for a device. 1602 */ 1603struct iommu_group *pci_device_group(struct device *dev) 1604{ 1605 struct pci_dev *pdev = to_pci_dev(dev); 1606 struct group_for_pci_data data; 1607 struct pci_bus *bus; 1608 struct iommu_group *group = NULL; 1609 u64 devfns[4] = { 0 }; 1610 1611 if (WARN_ON(!dev_is_pci(dev))) 1612 return ERR_PTR(-EINVAL); 1613 1614 /* 1615 * Find the upstream DMA alias for the device. A device must not 1616 * be aliased due to topology in order to have its own IOMMU group. 1617 * If we find an alias along the way that already belongs to a 1618 * group, use it. 1619 */ 1620 if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data)) 1621 return data.group; 1622 1623 pdev = data.pdev; 1624 1625 /* 1626 * Continue upstream from the point of minimum IOMMU granularity 1627 * due to aliases to the point where devices are protected from 1628 * peer-to-peer DMA by PCI ACS. Again, if we find an existing 1629 * group, use it. 1630 */ 1631 for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { 1632 if (!bus->self) 1633 continue; 1634 1635 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) 1636 break; 1637 1638 pdev = bus->self; 1639 1640 group = iommu_group_get(&pdev->dev); 1641 if (group) 1642 return group; 1643 } 1644 1645 /* 1646 * Look for existing groups on device aliases. If we alias another 1647 * device or another device aliases us, use the same group. 1648 */ 1649 group = get_pci_alias_group(pdev, (unsigned long *)devfns); 1650 if (group) 1651 return group; 1652 1653 /* 1654 * Look for existing groups on non-isolated functions on the same 1655 * slot and aliases of those funcions, if any. No need to clear 1656 * the search bitmap, the tested devfns are still valid. 1657 */ 1658 group = get_pci_function_alias_group(pdev, (unsigned long *)devfns); 1659 if (group) 1660 return group; 1661 1662 /* No shared group found, allocate new */ 1663 return iommu_group_alloc(); 1664} 1665EXPORT_SYMBOL_GPL(pci_device_group); 1666 1667/* Get the IOMMU group for device on fsl-mc bus */ 1668struct iommu_group *fsl_mc_device_group(struct device *dev) 1669{ 1670 struct device *cont_dev = fsl_mc_cont_dev(dev); 1671 struct iommu_group *group; 1672 1673 group = iommu_group_get(cont_dev); 1674 if (!group) 1675 group = iommu_group_alloc(); 1676 return group; 1677} 1678EXPORT_SYMBOL_GPL(fsl_mc_device_group); 1679 1680static struct iommu_domain *__iommu_alloc_identity_domain(struct device *dev) 1681{ 1682 const struct iommu_ops *ops = dev_iommu_ops(dev); 1683 struct iommu_domain *domain; 1684 1685 if (ops->identity_domain) 1686 return ops->identity_domain; 1687 1688 if (ops->domain_alloc_identity) { 1689 domain = ops->domain_alloc_identity(dev); 1690 if (IS_ERR(domain)) 1691 return domain; 1692 } else { 1693 return ERR_PTR(-EOPNOTSUPP); 1694 } 1695 1696 iommu_domain_init(domain, IOMMU_DOMAIN_IDENTITY, ops); 1697 return domain; 1698} 1699 1700static struct iommu_domain * 1701__iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) 1702{ 1703 struct device *dev = iommu_group_first_dev(group); 1704 struct iommu_domain *dom; 1705 1706 if (group->default_domain && group->default_domain->type == req_type) 1707 return group->default_domain; 1708 1709 /* 1710 * When allocating the DMA API domain assume that the driver is going to 1711 * use PASID and make sure the RID's domain is PASID compatible. 1712 */ 1713 if (req_type & __IOMMU_DOMAIN_PAGING) { 1714 dom = __iommu_paging_domain_alloc_flags(dev, req_type, 1715 dev->iommu->max_pasids ? IOMMU_HWPT_ALLOC_PASID : 0); 1716 1717 /* 1718 * If driver does not support PASID feature then 1719 * try to allocate non-PASID domain 1720 */ 1721 if (PTR_ERR(dom) == -EOPNOTSUPP) 1722 dom = __iommu_paging_domain_alloc_flags(dev, req_type, 0); 1723 1724 return dom; 1725 } 1726 1727 if (req_type == IOMMU_DOMAIN_IDENTITY) 1728 return __iommu_alloc_identity_domain(dev); 1729 1730 return ERR_PTR(-EINVAL); 1731} 1732 1733/* 1734 * req_type of 0 means "auto" which means to select a domain based on 1735 * iommu_def_domain_type or what the driver actually supports. 1736 */ 1737static struct iommu_domain * 1738iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) 1739{ 1740 const struct iommu_ops *ops = dev_iommu_ops(iommu_group_first_dev(group)); 1741 struct iommu_domain *dom; 1742 1743 lockdep_assert_held(&group->mutex); 1744 1745 /* 1746 * Allow legacy drivers to specify the domain that will be the default 1747 * domain. This should always be either an IDENTITY/BLOCKED/PLATFORM 1748 * domain. Do not use in new drivers. 1749 */ 1750 if (ops->default_domain) { 1751 if (req_type != ops->default_domain->type) 1752 return ERR_PTR(-EINVAL); 1753 return ops->default_domain; 1754 } 1755 1756 if (req_type) 1757 return __iommu_group_alloc_default_domain(group, req_type); 1758 1759 /* The driver gave no guidance on what type to use, try the default */ 1760 dom = __iommu_group_alloc_default_domain(group, iommu_def_domain_type); 1761 if (!IS_ERR(dom)) 1762 return dom; 1763 1764 /* Otherwise IDENTITY and DMA_FQ defaults will try DMA */ 1765 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA) 1766 return ERR_PTR(-EINVAL); 1767 dom = __iommu_group_alloc_default_domain(group, IOMMU_DOMAIN_DMA); 1768 if (IS_ERR(dom)) 1769 return dom; 1770 1771 pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", 1772 iommu_def_domain_type, group->name); 1773 return dom; 1774} 1775 1776struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) 1777{ 1778 return group->default_domain; 1779} 1780 1781static int probe_iommu_group(struct device *dev, void *data) 1782{ 1783 struct list_head *group_list = data; 1784 int ret; 1785 1786 mutex_lock(&iommu_probe_device_lock); 1787 ret = __iommu_probe_device(dev, group_list); 1788 mutex_unlock(&iommu_probe_device_lock); 1789 if (ret == -ENODEV) 1790 ret = 0; 1791 1792 return ret; 1793} 1794 1795static int iommu_bus_notifier(struct notifier_block *nb, 1796 unsigned long action, void *data) 1797{ 1798 struct device *dev = data; 1799 1800 if (action == BUS_NOTIFY_ADD_DEVICE) { 1801 int ret; 1802 1803 ret = iommu_probe_device(dev); 1804 return (ret) ? NOTIFY_DONE : NOTIFY_OK; 1805 } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { 1806 iommu_release_device(dev); 1807 return NOTIFY_OK; 1808 } 1809 1810 return 0; 1811} 1812 1813/* 1814 * Combine the driver's chosen def_domain_type across all the devices in a 1815 * group. Drivers must give a consistent result. 1816 */ 1817static int iommu_get_def_domain_type(struct iommu_group *group, 1818 struct device *dev, int cur_type) 1819{ 1820 const struct iommu_ops *ops = dev_iommu_ops(dev); 1821 int type; 1822 1823 if (ops->default_domain) { 1824 /* 1825 * Drivers that declare a global static default_domain will 1826 * always choose that. 1827 */ 1828 type = ops->default_domain->type; 1829 } else { 1830 if (ops->def_domain_type) 1831 type = ops->def_domain_type(dev); 1832 else 1833 return cur_type; 1834 } 1835 if (!type || cur_type == type) 1836 return cur_type; 1837 if (!cur_type) 1838 return type; 1839 1840 dev_err_ratelimited( 1841 dev, 1842 "IOMMU driver error, requesting conflicting def_domain_type, %s and %s, for devices in group %u.\n", 1843 iommu_domain_type_str(cur_type), iommu_domain_type_str(type), 1844 group->id); 1845 1846 /* 1847 * Try to recover, drivers are allowed to force IDENTITY or DMA, IDENTITY 1848 * takes precedence. 1849 */ 1850 if (type == IOMMU_DOMAIN_IDENTITY) 1851 return type; 1852 return cur_type; 1853} 1854 1855/* 1856 * A target_type of 0 will select the best domain type. 0 can be returned in 1857 * this case meaning the global default should be used. 1858 */ 1859static int iommu_get_default_domain_type(struct iommu_group *group, 1860 int target_type) 1861{ 1862 struct device *untrusted = NULL; 1863 struct group_device *gdev; 1864 int driver_type = 0; 1865 1866 lockdep_assert_held(&group->mutex); 1867 1868 /* 1869 * ARM32 drivers supporting CONFIG_ARM_DMA_USE_IOMMU can declare an 1870 * identity_domain and it will automatically become their default 1871 * domain. Later on ARM_DMA_USE_IOMMU will install its UNMANAGED domain. 1872 * Override the selection to IDENTITY. 1873 */ 1874 if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) { 1875 static_assert(!(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) && 1876 IS_ENABLED(CONFIG_IOMMU_DMA))); 1877 driver_type = IOMMU_DOMAIN_IDENTITY; 1878 } 1879 1880 for_each_group_device(group, gdev) { 1881 driver_type = iommu_get_def_domain_type(group, gdev->dev, 1882 driver_type); 1883 1884 if (dev_is_pci(gdev->dev) && to_pci_dev(gdev->dev)->untrusted) { 1885 /* 1886 * No ARM32 using systems will set untrusted, it cannot 1887 * work. 1888 */ 1889 if (WARN_ON(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU))) 1890 return -1; 1891 untrusted = gdev->dev; 1892 } 1893 } 1894 1895 /* 1896 * If the common dma ops are not selected in kconfig then we cannot use 1897 * IOMMU_DOMAIN_DMA at all. Force IDENTITY if nothing else has been 1898 * selected. 1899 */ 1900 if (!IS_ENABLED(CONFIG_IOMMU_DMA)) { 1901 if (WARN_ON(driver_type == IOMMU_DOMAIN_DMA)) 1902 return -1; 1903 if (!driver_type) 1904 driver_type = IOMMU_DOMAIN_IDENTITY; 1905 } 1906 1907 if (untrusted) { 1908 if (driver_type && driver_type != IOMMU_DOMAIN_DMA) { 1909 dev_err_ratelimited( 1910 untrusted, 1911 "Device is not trusted, but driver is overriding group %u to %s, refusing to probe.\n", 1912 group->id, iommu_domain_type_str(driver_type)); 1913 return -1; 1914 } 1915 driver_type = IOMMU_DOMAIN_DMA; 1916 } 1917 1918 if (target_type) { 1919 if (driver_type && target_type != driver_type) 1920 return -1; 1921 return target_type; 1922 } 1923 return driver_type; 1924} 1925 1926static void iommu_group_do_probe_finalize(struct device *dev) 1927{ 1928 const struct iommu_ops *ops = dev_iommu_ops(dev); 1929 1930 if (ops->probe_finalize) 1931 ops->probe_finalize(dev); 1932} 1933 1934static int bus_iommu_probe(const struct bus_type *bus) 1935{ 1936 struct iommu_group *group, *next; 1937 LIST_HEAD(group_list); 1938 int ret; 1939 1940 ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); 1941 if (ret) 1942 return ret; 1943 1944 list_for_each_entry_safe(group, next, &group_list, entry) { 1945 struct group_device *gdev; 1946 1947 mutex_lock(&group->mutex); 1948 1949 /* Remove item from the list */ 1950 list_del_init(&group->entry); 1951 1952 /* 1953 * We go to the trouble of deferred default domain creation so 1954 * that the cross-group default domain type and the setup of the 1955 * IOMMU_RESV_DIRECT will work correctly in non-hotpug scenarios. 1956 */ 1957 ret = iommu_setup_default_domain(group, 0); 1958 if (ret) { 1959 mutex_unlock(&group->mutex); 1960 return ret; 1961 } 1962 for_each_group_device(group, gdev) 1963 iommu_setup_dma_ops(gdev->dev, group->default_domain); 1964 mutex_unlock(&group->mutex); 1965 1966 /* 1967 * FIXME: Mis-locked because the ops->probe_finalize() call-back 1968 * of some IOMMU drivers calls arm_iommu_attach_device() which 1969 * in-turn might call back into IOMMU core code, where it tries 1970 * to take group->mutex, resulting in a deadlock. 1971 */ 1972 for_each_group_device(group, gdev) 1973 iommu_group_do_probe_finalize(gdev->dev); 1974 } 1975 1976 return 0; 1977} 1978 1979/** 1980 * device_iommu_capable() - check for a general IOMMU capability 1981 * @dev: device to which the capability would be relevant, if available 1982 * @cap: IOMMU capability 1983 * 1984 * Return: true if an IOMMU is present and supports the given capability 1985 * for the given device, otherwise false. 1986 */ 1987bool device_iommu_capable(struct device *dev, enum iommu_cap cap) 1988{ 1989 const struct iommu_ops *ops; 1990 1991 if (!dev_has_iommu(dev)) 1992 return false; 1993 1994 ops = dev_iommu_ops(dev); 1995 if (!ops->capable) 1996 return false; 1997 1998 return ops->capable(dev, cap); 1999} 2000EXPORT_SYMBOL_GPL(device_iommu_capable); 2001 2002/** 2003 * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi() 2004 * for a group 2005 * @group: Group to query 2006 * 2007 * IOMMU groups should not have differing values of 2008 * msi_device_has_isolated_msi() for devices in a group. However nothing 2009 * directly prevents this, so ensure mistakes don't result in isolation failures 2010 * by checking that all the devices are the same. 2011 */ 2012bool iommu_group_has_isolated_msi(struct iommu_group *group) 2013{ 2014 struct group_device *group_dev; 2015 bool ret = true; 2016 2017 mutex_lock(&group->mutex); 2018 for_each_group_device(group, group_dev) 2019 ret &= msi_device_has_isolated_msi(group_dev->dev); 2020 mutex_unlock(&group->mutex); 2021 return ret; 2022} 2023EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi); 2024 2025/** 2026 * iommu_set_fault_handler() - set a fault handler for an iommu domain 2027 * @domain: iommu domain 2028 * @handler: fault handler 2029 * @token: user data, will be passed back to the fault handler 2030 * 2031 * This function should be used by IOMMU users which want to be notified 2032 * whenever an IOMMU fault happens. 2033 * 2034 * The fault handler itself should return 0 on success, and an appropriate 2035 * error code otherwise. 2036 */ 2037void iommu_set_fault_handler(struct iommu_domain *domain, 2038 iommu_fault_handler_t handler, 2039 void *token) 2040{ 2041 if (WARN_ON(!domain || domain->cookie_type != IOMMU_COOKIE_NONE)) 2042 return; 2043 2044 domain->cookie_type = IOMMU_COOKIE_FAULT_HANDLER; 2045 domain->handler = handler; 2046 domain->handler_token = token; 2047} 2048EXPORT_SYMBOL_GPL(iommu_set_fault_handler); 2049 2050static void iommu_domain_init(struct iommu_domain *domain, unsigned int type, 2051 const struct iommu_ops *ops) 2052{ 2053 domain->type = type; 2054 domain->owner = ops; 2055 if (!domain->ops) 2056 domain->ops = ops->default_domain_ops; 2057} 2058 2059static struct iommu_domain * 2060__iommu_paging_domain_alloc_flags(struct device *dev, unsigned int type, 2061 unsigned int flags) 2062{ 2063 const struct iommu_ops *ops; 2064 struct iommu_domain *domain; 2065 2066 if (!dev_has_iommu(dev)) 2067 return ERR_PTR(-ENODEV); 2068 2069 ops = dev_iommu_ops(dev); 2070 2071 if (ops->domain_alloc_paging && !flags) 2072 domain = ops->domain_alloc_paging(dev); 2073 else if (ops->domain_alloc_paging_flags) 2074 domain = ops->domain_alloc_paging_flags(dev, flags, NULL); 2075#if IS_ENABLED(CONFIG_FSL_PAMU) 2076 else if (ops->domain_alloc && !flags) 2077 domain = ops->domain_alloc(IOMMU_DOMAIN_UNMANAGED); 2078#endif 2079 else 2080 return ERR_PTR(-EOPNOTSUPP); 2081 2082 if (IS_ERR(domain)) 2083 return domain; 2084 if (!domain) 2085 return ERR_PTR(-ENOMEM); 2086 2087 iommu_domain_init(domain, type, ops); 2088 return domain; 2089} 2090 2091/** 2092 * iommu_paging_domain_alloc_flags() - Allocate a paging domain 2093 * @dev: device for which the domain is allocated 2094 * @flags: Bitmap of iommufd_hwpt_alloc_flags 2095 * 2096 * Allocate a paging domain which will be managed by a kernel driver. Return 2097 * allocated domain if successful, or an ERR pointer for failure. 2098 */ 2099struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev, 2100 unsigned int flags) 2101{ 2102 return __iommu_paging_domain_alloc_flags(dev, 2103 IOMMU_DOMAIN_UNMANAGED, flags); 2104} 2105EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc_flags); 2106 2107void iommu_domain_free(struct iommu_domain *domain) 2108{ 2109 switch (domain->cookie_type) { 2110 case IOMMU_COOKIE_DMA_IOVA: 2111 iommu_put_dma_cookie(domain); 2112 break; 2113 case IOMMU_COOKIE_DMA_MSI: 2114 iommu_put_msi_cookie(domain); 2115 break; 2116 case IOMMU_COOKIE_SVA: 2117 mmdrop(domain->mm); 2118 break; 2119 default: 2120 break; 2121 } 2122 if (domain->ops->free) 2123 domain->ops->free(domain); 2124} 2125EXPORT_SYMBOL_GPL(iommu_domain_free); 2126 2127/* 2128 * Put the group's domain back to the appropriate core-owned domain - either the 2129 * standard kernel-mode DMA configuration or an all-DMA-blocked domain. 2130 */ 2131static void __iommu_group_set_core_domain(struct iommu_group *group) 2132{ 2133 struct iommu_domain *new_domain; 2134 2135 if (group->owner) 2136 new_domain = group->blocking_domain; 2137 else 2138 new_domain = group->default_domain; 2139 2140 __iommu_group_set_domain_nofail(group, new_domain); 2141} 2142 2143static int __iommu_attach_device(struct iommu_domain *domain, 2144 struct device *dev, struct iommu_domain *old) 2145{ 2146 int ret; 2147 2148 if (unlikely(domain->ops->attach_dev == NULL)) 2149 return -ENODEV; 2150 2151 ret = domain->ops->attach_dev(domain, dev, old); 2152 if (ret) 2153 return ret; 2154 dev->iommu->attach_deferred = 0; 2155 trace_attach_device_to_domain(dev); 2156 return 0; 2157} 2158 2159/** 2160 * iommu_attach_device - Attach an IOMMU domain to a device 2161 * @domain: IOMMU domain to attach 2162 * @dev: Device that will be attached 2163 * 2164 * Returns 0 on success and error code on failure 2165 * 2166 * Note that EINVAL can be treated as a soft failure, indicating 2167 * that certain configuration of the domain is incompatible with 2168 * the device. In this case attaching a different domain to the 2169 * device may succeed. 2170 */ 2171int iommu_attach_device(struct iommu_domain *domain, struct device *dev) 2172{ 2173 /* Caller must be a probed driver on dev */ 2174 struct iommu_group *group = dev->iommu_group; 2175 int ret; 2176 2177 if (!group) 2178 return -ENODEV; 2179 2180 /* 2181 * Lock the group to make sure the device-count doesn't 2182 * change while we are attaching 2183 */ 2184 mutex_lock(&group->mutex); 2185 ret = -EINVAL; 2186 if (list_count_nodes(&group->devices) != 1) 2187 goto out_unlock; 2188 2189 ret = __iommu_attach_group(domain, group); 2190 2191out_unlock: 2192 mutex_unlock(&group->mutex); 2193 return ret; 2194} 2195EXPORT_SYMBOL_GPL(iommu_attach_device); 2196 2197int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) 2198{ 2199 /* 2200 * This is called on the dma mapping fast path so avoid locking. This is 2201 * racy, but we have an expectation that the driver will setup its DMAs 2202 * inside probe while being single threaded to avoid racing. 2203 */ 2204 if (!dev->iommu || !dev->iommu->attach_deferred) 2205 return 0; 2206 2207 guard(mutex)(&dev->iommu_group->mutex); 2208 2209 /* 2210 * This is a concurrent attach during a device reset. Reject it until 2211 * pci_dev_reset_iommu_done() attaches the device to group->domain. 2212 * 2213 * Note that this might fail the iommu_dma_map(). But there's nothing 2214 * more we can do here. 2215 */ 2216 if (dev->iommu_group->resetting_domain) 2217 return -EBUSY; 2218 return __iommu_attach_device(domain, dev, NULL); 2219} 2220 2221void iommu_detach_device(struct iommu_domain *domain, struct device *dev) 2222{ 2223 /* Caller must be a probed driver on dev */ 2224 struct iommu_group *group = dev->iommu_group; 2225 2226 if (!group) 2227 return; 2228 2229 mutex_lock(&group->mutex); 2230 if (WARN_ON(domain != group->domain) || 2231 WARN_ON(list_count_nodes(&group->devices) != 1)) 2232 goto out_unlock; 2233 __iommu_group_set_core_domain(group); 2234 2235out_unlock: 2236 mutex_unlock(&group->mutex); 2237} 2238EXPORT_SYMBOL_GPL(iommu_detach_device); 2239 2240/** 2241 * iommu_get_domain_for_dev() - Return the DMA API domain pointer 2242 * @dev: Device to query 2243 * 2244 * This function can be called within a driver bound to dev. The returned 2245 * pointer is valid for the lifetime of the bound driver. 2246 * 2247 * It should not be called by drivers with driver_managed_dma = true. 2248 */ 2249struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) 2250{ 2251 /* Caller must be a probed driver on dev */ 2252 struct iommu_group *group = dev->iommu_group; 2253 2254 if (!group) 2255 return NULL; 2256 2257 lockdep_assert_not_held(&group->mutex); 2258 2259 return group->domain; 2260} 2261EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); 2262 2263/** 2264 * iommu_driver_get_domain_for_dev() - Return the driver-level domain pointer 2265 * @dev: Device to query 2266 * 2267 * This function can be called by an iommu driver that wants to get the physical 2268 * domain within an iommu callback function where group->mutex is held. 2269 */ 2270struct iommu_domain *iommu_driver_get_domain_for_dev(struct device *dev) 2271{ 2272 struct iommu_group *group = dev->iommu_group; 2273 2274 lockdep_assert_held(&group->mutex); 2275 2276 /* 2277 * Driver handles the low-level __iommu_attach_device(), including the 2278 * one invoked by pci_dev_reset_iommu_done() re-attaching the device to 2279 * the cached group->domain. In this case, the driver must get the old 2280 * domain from group->resetting_domain rather than group->domain. This 2281 * prevents it from re-attaching the device from group->domain (old) to 2282 * group->domain (new). 2283 */ 2284 if (group->resetting_domain) 2285 return group->resetting_domain; 2286 2287 return group->domain; 2288} 2289EXPORT_SYMBOL_GPL(iommu_driver_get_domain_for_dev); 2290 2291/* 2292 * For IOMMU_DOMAIN_DMA implementations which already provide their own 2293 * guarantees that the group and its default domain are valid and correct. 2294 */ 2295struct iommu_domain *iommu_get_dma_domain(struct device *dev) 2296{ 2297 return dev->iommu_group->default_domain; 2298} 2299 2300static void *iommu_make_pasid_array_entry(struct iommu_domain *domain, 2301 struct iommu_attach_handle *handle) 2302{ 2303 if (handle) { 2304 handle->domain = domain; 2305 return xa_tag_pointer(handle, IOMMU_PASID_ARRAY_HANDLE); 2306 } 2307 2308 return xa_tag_pointer(domain, IOMMU_PASID_ARRAY_DOMAIN); 2309} 2310 2311static bool domain_iommu_ops_compatible(const struct iommu_ops *ops, 2312 struct iommu_domain *domain) 2313{ 2314 if (domain->owner == ops) 2315 return true; 2316 2317 /* For static domains, owner isn't set. */ 2318 if (domain == ops->blocked_domain || domain == ops->identity_domain) 2319 return true; 2320 2321 return false; 2322} 2323 2324static int __iommu_attach_group(struct iommu_domain *domain, 2325 struct iommu_group *group) 2326{ 2327 struct device *dev; 2328 2329 if (group->domain && group->domain != group->default_domain && 2330 group->domain != group->blocking_domain) 2331 return -EBUSY; 2332 2333 dev = iommu_group_first_dev(group); 2334 if (!dev_has_iommu(dev) || 2335 !domain_iommu_ops_compatible(dev_iommu_ops(dev), domain)) 2336 return -EINVAL; 2337 2338 return __iommu_group_set_domain(group, domain); 2339} 2340 2341/** 2342 * iommu_attach_group - Attach an IOMMU domain to an IOMMU group 2343 * @domain: IOMMU domain to attach 2344 * @group: IOMMU group that will be attached 2345 * 2346 * Returns 0 on success and error code on failure 2347 * 2348 * Note that EINVAL can be treated as a soft failure, indicating 2349 * that certain configuration of the domain is incompatible with 2350 * the group. In this case attaching a different domain to the 2351 * group may succeed. 2352 */ 2353int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) 2354{ 2355 int ret; 2356 2357 mutex_lock(&group->mutex); 2358 ret = __iommu_attach_group(domain, group); 2359 mutex_unlock(&group->mutex); 2360 2361 return ret; 2362} 2363EXPORT_SYMBOL_GPL(iommu_attach_group); 2364 2365static int __iommu_device_set_domain(struct iommu_group *group, 2366 struct device *dev, 2367 struct iommu_domain *new_domain, 2368 struct iommu_domain *old_domain, 2369 unsigned int flags) 2370{ 2371 int ret; 2372 2373 /* 2374 * If the device requires IOMMU_RESV_DIRECT then we cannot allow 2375 * the blocking domain to be attached as it does not contain the 2376 * required 1:1 mapping. This test effectively excludes the device 2377 * being used with iommu_group_claim_dma_owner() which will block 2378 * vfio and iommufd as well. 2379 */ 2380 if (dev->iommu->require_direct && 2381 (new_domain->type == IOMMU_DOMAIN_BLOCKED || 2382 new_domain == group->blocking_domain)) { 2383 dev_warn(dev, 2384 "Firmware has requested this device have a 1:1 IOMMU mapping, rejecting configuring the device without a 1:1 mapping. Contact your platform vendor.\n"); 2385 return -EINVAL; 2386 } 2387 2388 if (dev->iommu->attach_deferred) { 2389 if (new_domain == group->default_domain) 2390 return 0; 2391 dev->iommu->attach_deferred = 0; 2392 } 2393 2394 ret = __iommu_attach_device(new_domain, dev, old_domain); 2395 if (ret) { 2396 /* 2397 * If we have a blocking domain then try to attach that in hopes 2398 * of avoiding a UAF. Modern drivers should implement blocking 2399 * domains as global statics that cannot fail. 2400 */ 2401 if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) && 2402 group->blocking_domain && 2403 group->blocking_domain != new_domain) 2404 __iommu_attach_device(group->blocking_domain, dev, 2405 old_domain); 2406 return ret; 2407 } 2408 return 0; 2409} 2410 2411/* 2412 * If 0 is returned the group's domain is new_domain. If an error is returned 2413 * then the group's domain will be set back to the existing domain unless 2414 * IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's 2415 * domains is left inconsistent. This is a driver bug to fail attach with a 2416 * previously good domain. We try to avoid a kernel UAF because of this. 2417 * 2418 * IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU 2419 * API works on domains and devices. Bridge that gap by iterating over the 2420 * devices in a group. Ideally we'd have a single device which represents the 2421 * requestor ID of the group, but we also allow IOMMU drivers to create policy 2422 * defined minimum sets, where the physical hardware may be able to distiguish 2423 * members, but we wish to group them at a higher level (ex. untrusted 2424 * multi-function PCI devices). Thus we attach each device. 2425 */ 2426static int __iommu_group_set_domain_internal(struct iommu_group *group, 2427 struct iommu_domain *new_domain, 2428 unsigned int flags) 2429{ 2430 struct group_device *last_gdev; 2431 struct group_device *gdev; 2432 int result; 2433 int ret; 2434 2435 lockdep_assert_held(&group->mutex); 2436 2437 if (group->domain == new_domain) 2438 return 0; 2439 2440 if (WARN_ON(!new_domain)) 2441 return -EINVAL; 2442 2443 /* 2444 * This is a concurrent attach during a device reset. Reject it until 2445 * pci_dev_reset_iommu_done() attaches the device to group->domain. 2446 */ 2447 if (group->resetting_domain) 2448 return -EBUSY; 2449 2450 /* 2451 * Changing the domain is done by calling attach_dev() on the new 2452 * domain. This switch does not have to be atomic and DMA can be 2453 * discarded during the transition. DMA must only be able to access 2454 * either new_domain or group->domain, never something else. 2455 */ 2456 result = 0; 2457 for_each_group_device(group, gdev) { 2458 ret = __iommu_device_set_domain(group, gdev->dev, new_domain, 2459 group->domain, flags); 2460 if (ret) { 2461 result = ret; 2462 /* 2463 * Keep trying the other devices in the group. If a 2464 * driver fails attach to an otherwise good domain, and 2465 * does not support blocking domains, it should at least 2466 * drop its reference on the current domain so we don't 2467 * UAF. 2468 */ 2469 if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) 2470 continue; 2471 goto err_revert; 2472 } 2473 } 2474 group->domain = new_domain; 2475 return result; 2476 2477err_revert: 2478 /* 2479 * This is called in error unwind paths. A well behaved driver should 2480 * always allow us to attach to a domain that was already attached. 2481 */ 2482 last_gdev = gdev; 2483 for_each_group_device(group, gdev) { 2484 /* No need to revert the last gdev that failed to set domain */ 2485 if (gdev == last_gdev) 2486 break; 2487 /* 2488 * A NULL domain can happen only for first probe, in which case 2489 * we leave group->domain as NULL and let release clean 2490 * everything up. 2491 */ 2492 if (group->domain) 2493 WARN_ON(__iommu_device_set_domain( 2494 group, gdev->dev, group->domain, new_domain, 2495 IOMMU_SET_DOMAIN_MUST_SUCCEED)); 2496 } 2497 return ret; 2498} 2499 2500void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) 2501{ 2502 mutex_lock(&group->mutex); 2503 __iommu_group_set_core_domain(group); 2504 mutex_unlock(&group->mutex); 2505} 2506EXPORT_SYMBOL_GPL(iommu_detach_group); 2507 2508phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 2509{ 2510 if (domain->type == IOMMU_DOMAIN_IDENTITY) 2511 return iova; 2512 2513 if (domain->type == IOMMU_DOMAIN_BLOCKED) 2514 return 0; 2515 2516 return domain->ops->iova_to_phys(domain, iova); 2517} 2518EXPORT_SYMBOL_GPL(iommu_iova_to_phys); 2519 2520static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, 2521 phys_addr_t paddr, size_t size, size_t *count) 2522{ 2523 unsigned int pgsize_idx, pgsize_idx_next; 2524 unsigned long pgsizes; 2525 size_t offset, pgsize, pgsize_next; 2526 size_t offset_end; 2527 unsigned long addr_merge = paddr | iova; 2528 2529 /* Page sizes supported by the hardware and small enough for @size */ 2530 pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); 2531 2532 /* Constrain the page sizes further based on the maximum alignment */ 2533 if (likely(addr_merge)) 2534 pgsizes &= GENMASK(__ffs(addr_merge), 0); 2535 2536 /* Make sure we have at least one suitable page size */ 2537 BUG_ON(!pgsizes); 2538 2539 /* Pick the biggest page size remaining */ 2540 pgsize_idx = __fls(pgsizes); 2541 pgsize = BIT(pgsize_idx); 2542 if (!count) 2543 return pgsize; 2544 2545 /* Find the next biggest support page size, if it exists */ 2546 pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); 2547 if (!pgsizes) 2548 goto out_set_count; 2549 2550 pgsize_idx_next = __ffs(pgsizes); 2551 pgsize_next = BIT(pgsize_idx_next); 2552 2553 /* 2554 * There's no point trying a bigger page size unless the virtual 2555 * and physical addresses are similarly offset within the larger page. 2556 */ 2557 if ((iova ^ paddr) & (pgsize_next - 1)) 2558 goto out_set_count; 2559 2560 /* Calculate the offset to the next page size alignment boundary */ 2561 offset = pgsize_next - (addr_merge & (pgsize_next - 1)); 2562 2563 /* 2564 * If size is big enough to accommodate the larger page, reduce 2565 * the number of smaller pages. 2566 */ 2567 if (!check_add_overflow(offset, pgsize_next, &offset_end) && 2568 offset_end <= size) 2569 size = offset; 2570 2571out_set_count: 2572 *count = size >> pgsize_idx; 2573 return pgsize; 2574} 2575 2576static int __iommu_map_domain_pgtbl(struct iommu_domain *domain, 2577 unsigned long iova, phys_addr_t paddr, 2578 size_t size, int prot, gfp_t gfp) 2579{ 2580 const struct iommu_domain_ops *ops = domain->ops; 2581 unsigned long orig_iova = iova; 2582 unsigned int min_pagesz; 2583 size_t orig_size = size; 2584 int ret = 0; 2585 2586 might_sleep_if(gfpflags_allow_blocking(gfp)); 2587 2588 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2589 return -EINVAL; 2590 2591 if (WARN_ON(!ops->map_pages || domain->pgsize_bitmap == 0UL)) 2592 return -ENODEV; 2593 2594 /* Discourage passing strange GFP flags */ 2595 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2596 __GFP_HIGHMEM))) 2597 return -EINVAL; 2598 2599 /* find out the minimum page size supported */ 2600 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2601 2602 /* 2603 * both the virtual address and the physical one, as well as 2604 * the size of the mapping, must be aligned (at least) to the 2605 * size of the smallest page supported by the hardware 2606 */ 2607 if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { 2608 pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", 2609 iova, &paddr, size, min_pagesz); 2610 return -EINVAL; 2611 } 2612 2613 pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); 2614 2615 while (size) { 2616 size_t pgsize, count, mapped = 0; 2617 2618 pgsize = iommu_pgsize(domain, iova, paddr, size, &count); 2619 2620 pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", 2621 iova, &paddr, pgsize, count); 2622 ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, 2623 gfp, &mapped); 2624 /* 2625 * Some pages may have been mapped, even if an error occurred, 2626 * so we should account for those so they can be unmapped. 2627 */ 2628 size -= mapped; 2629 2630 if (ret) 2631 break; 2632 2633 iova += mapped; 2634 paddr += mapped; 2635 } 2636 2637 /* unroll mapping in case something went wrong */ 2638 if (ret) { 2639 iommu_unmap(domain, orig_iova, orig_size - size); 2640 return ret; 2641 } 2642 return 0; 2643} 2644 2645int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size) 2646{ 2647 const struct iommu_domain_ops *ops = domain->ops; 2648 2649 if (!ops->iotlb_sync_map) 2650 return 0; 2651 return ops->iotlb_sync_map(domain, iova, size); 2652} 2653 2654int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova, 2655 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2656{ 2657 struct pt_iommu *pt = iommupt_from_domain(domain); 2658 int ret; 2659 2660 if (pt) { 2661 size_t mapped = 0; 2662 2663 ret = pt->ops->map_range(pt, iova, paddr, size, prot, gfp, 2664 &mapped); 2665 if (ret) { 2666 iommu_unmap(domain, iova, mapped); 2667 return ret; 2668 } 2669 return 0; 2670 } 2671 ret = __iommu_map_domain_pgtbl(domain, iova, paddr, size, prot, gfp); 2672 if (!ret) 2673 return ret; 2674 2675 trace_map(iova, paddr, size); 2676 iommu_debug_map(domain, paddr, size); 2677 return 0; 2678} 2679 2680int iommu_map(struct iommu_domain *domain, unsigned long iova, 2681 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2682{ 2683 int ret; 2684 2685 ret = iommu_map_nosync(domain, iova, paddr, size, prot, gfp); 2686 if (ret) 2687 return ret; 2688 2689 ret = iommu_sync_map(domain, iova, size); 2690 if (ret) 2691 iommu_unmap(domain, iova, size); 2692 2693 return ret; 2694} 2695EXPORT_SYMBOL_GPL(iommu_map); 2696 2697static size_t 2698__iommu_unmap_domain_pgtbl(struct iommu_domain *domain, unsigned long iova, 2699 size_t size, struct iommu_iotlb_gather *iotlb_gather) 2700{ 2701 const struct iommu_domain_ops *ops = domain->ops; 2702 size_t unmapped_page, unmapped = 0; 2703 unsigned int min_pagesz; 2704 2705 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2706 return 0; 2707 2708 if (WARN_ON(!ops->unmap_pages || domain->pgsize_bitmap == 0UL)) 2709 return 0; 2710 2711 /* find out the minimum page size supported */ 2712 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2713 2714 /* 2715 * The virtual address, as well as the size of the mapping, must be 2716 * aligned (at least) to the size of the smallest page supported 2717 * by the hardware 2718 */ 2719 if (!IS_ALIGNED(iova | size, min_pagesz)) { 2720 pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n", 2721 iova, size, min_pagesz); 2722 return 0; 2723 } 2724 2725 pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size); 2726 2727 iommu_debug_unmap_begin(domain, iova, size); 2728 2729 /* 2730 * Keep iterating until we either unmap 'size' bytes (or more) 2731 * or we hit an area that isn't mapped. 2732 */ 2733 while (unmapped < size) { 2734 size_t pgsize, count; 2735 2736 pgsize = iommu_pgsize(domain, iova, iova, size - unmapped, &count); 2737 unmapped_page = ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather); 2738 if (!unmapped_page) 2739 break; 2740 2741 pr_debug("unmapped: iova 0x%lx size 0x%zx\n", 2742 iova, unmapped_page); 2743 /* 2744 * If the driver itself isn't using the gather, make sure 2745 * it looks non-empty so iotlb_sync will still be called. 2746 */ 2747 if (iotlb_gather->start >= iotlb_gather->end) 2748 iommu_iotlb_gather_add_range(iotlb_gather, iova, size); 2749 2750 iova += unmapped_page; 2751 unmapped += unmapped_page; 2752 } 2753 2754 return unmapped; 2755} 2756 2757static size_t __iommu_unmap(struct iommu_domain *domain, unsigned long iova, 2758 size_t size, 2759 struct iommu_iotlb_gather *iotlb_gather) 2760{ 2761 struct pt_iommu *pt = iommupt_from_domain(domain); 2762 size_t unmapped; 2763 2764 if (pt) 2765 unmapped = pt->ops->unmap_range(pt, iova, size, iotlb_gather); 2766 else 2767 unmapped = __iommu_unmap_domain_pgtbl(domain, iova, size, 2768 iotlb_gather); 2769 trace_unmap(iova, size, unmapped); 2770 iommu_debug_unmap_end(domain, iova, size, unmapped); 2771 return unmapped; 2772} 2773 2774/** 2775 * iommu_unmap() - Remove mappings from a range of IOVA 2776 * @domain: Domain to manipulate 2777 * @iova: IO virtual address to start 2778 * @size: Length of the range starting from @iova 2779 * 2780 * iommu_unmap() will remove a translation created by iommu_map(). It cannot 2781 * subdivide a mapping created by iommu_map(), so it should be called with IOVA 2782 * ranges that match what was passed to iommu_map(). The range can aggregate 2783 * contiguous iommu_map() calls so long as no individual range is split. 2784 * 2785 * Returns: Number of bytes of IOVA unmapped. iova + res will be the point 2786 * unmapping stopped. 2787 */ 2788size_t iommu_unmap(struct iommu_domain *domain, 2789 unsigned long iova, size_t size) 2790{ 2791 struct iommu_iotlb_gather iotlb_gather; 2792 size_t ret; 2793 2794 iommu_iotlb_gather_init(&iotlb_gather); 2795 ret = __iommu_unmap(domain, iova, size, &iotlb_gather); 2796 iommu_iotlb_sync(domain, &iotlb_gather); 2797 2798 return ret; 2799} 2800EXPORT_SYMBOL_GPL(iommu_unmap); 2801 2802/** 2803 * iommu_unmap_fast() - Remove mappings from a range of IOVA without IOTLB sync 2804 * @domain: Domain to manipulate 2805 * @iova: IO virtual address to start 2806 * @size: Length of the range starting from @iova 2807 * @iotlb_gather: range information for a pending IOTLB flush 2808 * 2809 * iommu_unmap_fast() will remove a translation created by iommu_map(). 2810 * It can't subdivide a mapping created by iommu_map(), so it should be 2811 * called with IOVA ranges that match what was passed to iommu_map(). The 2812 * range can aggregate contiguous iommu_map() calls so long as no individual 2813 * range is split. 2814 * 2815 * Basically iommu_unmap_fast() is the same as iommu_unmap() but for callers 2816 * which manage the IOTLB flushing externally to perform a batched sync. 2817 * 2818 * Returns: Number of bytes of IOVA unmapped. iova + res will be the point 2819 * unmapping stopped. 2820 */ 2821size_t iommu_unmap_fast(struct iommu_domain *domain, 2822 unsigned long iova, size_t size, 2823 struct iommu_iotlb_gather *iotlb_gather) 2824{ 2825 return __iommu_unmap(domain, iova, size, iotlb_gather); 2826} 2827EXPORT_SYMBOL_GPL(iommu_unmap_fast); 2828 2829ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, 2830 struct scatterlist *sg, unsigned int nents, int prot, 2831 gfp_t gfp) 2832{ 2833 size_t len = 0, mapped = 0; 2834 phys_addr_t start; 2835 unsigned int i = 0; 2836 int ret; 2837 2838 while (i <= nents) { 2839 phys_addr_t s_phys = sg_phys(sg); 2840 2841 if (len && s_phys != start + len) { 2842 ret = iommu_map_nosync(domain, iova + mapped, start, 2843 len, prot, gfp); 2844 if (ret) 2845 goto out_err; 2846 2847 mapped += len; 2848 len = 0; 2849 } 2850 2851 if (sg_dma_is_bus_address(sg)) 2852 goto next; 2853 2854 if (len) { 2855 len += sg->length; 2856 } else { 2857 len = sg->length; 2858 start = s_phys; 2859 } 2860 2861next: 2862 if (++i < nents) 2863 sg = sg_next(sg); 2864 } 2865 2866 ret = iommu_sync_map(domain, iova, mapped); 2867 if (ret) 2868 goto out_err; 2869 2870 return mapped; 2871 2872out_err: 2873 /* undo mappings already done */ 2874 iommu_unmap(domain, iova, mapped); 2875 2876 return ret; 2877} 2878EXPORT_SYMBOL_GPL(iommu_map_sg); 2879 2880/** 2881 * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework 2882 * @domain: the iommu domain where the fault has happened 2883 * @dev: the device where the fault has happened 2884 * @iova: the faulting address 2885 * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) 2886 * 2887 * This function should be called by the low-level IOMMU implementations 2888 * whenever IOMMU faults happen, to allow high-level users, that are 2889 * interested in such events, to know about them. 2890 * 2891 * This event may be useful for several possible use cases: 2892 * - mere logging of the event 2893 * - dynamic TLB/PTE loading 2894 * - if restarting of the faulting device is required 2895 * 2896 * Returns 0 on success and an appropriate error code otherwise (if dynamic 2897 * PTE/TLB loading will one day be supported, implementations will be able 2898 * to tell whether it succeeded or not according to this return value). 2899 * 2900 * Specifically, -ENOSYS is returned if a fault handler isn't installed 2901 * (though fault handlers can also return -ENOSYS, in case they want to 2902 * elicit the default behavior of the IOMMU drivers). 2903 */ 2904int report_iommu_fault(struct iommu_domain *domain, struct device *dev, 2905 unsigned long iova, int flags) 2906{ 2907 int ret = -ENOSYS; 2908 2909 /* 2910 * if upper layers showed interest and installed a fault handler, 2911 * invoke it. 2912 */ 2913 if (domain->cookie_type == IOMMU_COOKIE_FAULT_HANDLER && 2914 domain->handler) 2915 ret = domain->handler(domain, dev, iova, flags, 2916 domain->handler_token); 2917 2918 trace_io_page_fault(dev, iova, flags); 2919 return ret; 2920} 2921EXPORT_SYMBOL_GPL(report_iommu_fault); 2922 2923static int __init iommu_init(void) 2924{ 2925 iommu_group_kset = kset_create_and_add("iommu_groups", 2926 NULL, kernel_kobj); 2927 BUG_ON(!iommu_group_kset); 2928 2929 iommu_debugfs_setup(); 2930 2931 return 0; 2932} 2933core_initcall(iommu_init); 2934 2935int iommu_set_pgtable_quirks(struct iommu_domain *domain, 2936 unsigned long quirk) 2937{ 2938 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2939 return -EINVAL; 2940 if (!domain->ops->set_pgtable_quirks) 2941 return -EINVAL; 2942 return domain->ops->set_pgtable_quirks(domain, quirk); 2943} 2944EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); 2945 2946/** 2947 * iommu_get_resv_regions - get reserved regions 2948 * @dev: device for which to get reserved regions 2949 * @list: reserved region list for device 2950 * 2951 * This returns a list of reserved IOVA regions specific to this device. 2952 * A domain user should not map IOVA in these ranges. 2953 */ 2954void iommu_get_resv_regions(struct device *dev, struct list_head *list) 2955{ 2956 const struct iommu_ops *ops = dev_iommu_ops(dev); 2957 2958 if (ops->get_resv_regions) 2959 ops->get_resv_regions(dev, list); 2960} 2961EXPORT_SYMBOL_GPL(iommu_get_resv_regions); 2962 2963/** 2964 * iommu_put_resv_regions - release reserved regions 2965 * @dev: device for which to free reserved regions 2966 * @list: reserved region list for device 2967 * 2968 * This releases a reserved region list acquired by iommu_get_resv_regions(). 2969 */ 2970void iommu_put_resv_regions(struct device *dev, struct list_head *list) 2971{ 2972 struct iommu_resv_region *entry, *next; 2973 2974 list_for_each_entry_safe(entry, next, list, list) { 2975 if (entry->free) 2976 entry->free(dev, entry); 2977 else 2978 kfree(entry); 2979 } 2980} 2981EXPORT_SYMBOL(iommu_put_resv_regions); 2982 2983struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, 2984 size_t length, int prot, 2985 enum iommu_resv_type type, 2986 gfp_t gfp) 2987{ 2988 struct iommu_resv_region *region; 2989 2990 region = kzalloc_obj(*region, gfp); 2991 if (!region) 2992 return NULL; 2993 2994 INIT_LIST_HEAD(&region->list); 2995 region->start = start; 2996 region->length = length; 2997 region->prot = prot; 2998 region->type = type; 2999 return region; 3000} 3001EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); 3002 3003void iommu_set_default_passthrough(bool cmd_line) 3004{ 3005 if (cmd_line) 3006 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 3007 iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; 3008} 3009 3010void iommu_set_default_translated(bool cmd_line) 3011{ 3012 if (cmd_line) 3013 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 3014 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 3015} 3016 3017bool iommu_default_passthrough(void) 3018{ 3019 return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY; 3020} 3021EXPORT_SYMBOL_GPL(iommu_default_passthrough); 3022 3023static const struct iommu_device *iommu_from_fwnode(const struct fwnode_handle *fwnode) 3024{ 3025 const struct iommu_device *iommu, *ret = NULL; 3026 3027 spin_lock(&iommu_device_lock); 3028 list_for_each_entry(iommu, &iommu_device_list, list) 3029 if (iommu->fwnode == fwnode) { 3030 ret = iommu; 3031 break; 3032 } 3033 spin_unlock(&iommu_device_lock); 3034 return ret; 3035} 3036 3037const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) 3038{ 3039 const struct iommu_device *iommu = iommu_from_fwnode(fwnode); 3040 3041 return iommu ? iommu->ops : NULL; 3042} 3043 3044int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode) 3045{ 3046 const struct iommu_device *iommu = iommu_from_fwnode(iommu_fwnode); 3047 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 3048 3049 if (!iommu) 3050 return driver_deferred_probe_check_state(dev); 3051 if (!dev->iommu && !READ_ONCE(iommu->ready)) 3052 return -EPROBE_DEFER; 3053 3054 if (fwspec) 3055 return iommu->ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL; 3056 3057 if (!dev_iommu_get(dev)) 3058 return -ENOMEM; 3059 3060 /* Preallocate for the overwhelmingly common case of 1 ID */ 3061 fwspec = kzalloc_flex(*fwspec, ids, 1); 3062 if (!fwspec) 3063 return -ENOMEM; 3064 3065 fwnode_handle_get(iommu_fwnode); 3066 fwspec->iommu_fwnode = iommu_fwnode; 3067 dev_iommu_fwspec_set(dev, fwspec); 3068 return 0; 3069} 3070EXPORT_SYMBOL_GPL(iommu_fwspec_init); 3071 3072void iommu_fwspec_free(struct device *dev) 3073{ 3074 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 3075 3076 if (fwspec) { 3077 fwnode_handle_put(fwspec->iommu_fwnode); 3078 kfree(fwspec); 3079 dev_iommu_fwspec_set(dev, NULL); 3080 } 3081} 3082 3083int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids) 3084{ 3085 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 3086 int i, new_num; 3087 3088 if (!fwspec) 3089 return -EINVAL; 3090 3091 new_num = fwspec->num_ids + num_ids; 3092 if (new_num > 1) { 3093 fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num), 3094 GFP_KERNEL); 3095 if (!fwspec) 3096 return -ENOMEM; 3097 3098 dev_iommu_fwspec_set(dev, fwspec); 3099 } 3100 3101 for (i = 0; i < num_ids; i++) 3102 fwspec->ids[fwspec->num_ids + i] = ids[i]; 3103 3104 fwspec->num_ids = new_num; 3105 return 0; 3106} 3107EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); 3108 3109/** 3110 * iommu_setup_default_domain - Set the default_domain for the group 3111 * @group: Group to change 3112 * @target_type: Domain type to set as the default_domain 3113 * 3114 * Allocate a default domain and set it as the current domain on the group. If 3115 * the group already has a default domain it will be changed to the target_type. 3116 * When target_type is 0 the default domain is selected based on driver and 3117 * system preferences. 3118 */ 3119static int iommu_setup_default_domain(struct iommu_group *group, 3120 int target_type) 3121{ 3122 struct iommu_domain *old_dom = group->default_domain; 3123 struct group_device *gdev; 3124 struct iommu_domain *dom; 3125 bool direct_failed; 3126 int req_type; 3127 int ret; 3128 3129 lockdep_assert_held(&group->mutex); 3130 3131 req_type = iommu_get_default_domain_type(group, target_type); 3132 if (req_type < 0) 3133 return -EINVAL; 3134 3135 dom = iommu_group_alloc_default_domain(group, req_type); 3136 if (IS_ERR(dom)) 3137 return PTR_ERR(dom); 3138 3139 if (group->default_domain == dom) 3140 return 0; 3141 3142 if (iommu_is_dma_domain(dom)) { 3143 ret = iommu_get_dma_cookie(dom); 3144 if (ret) { 3145 iommu_domain_free(dom); 3146 return ret; 3147 } 3148 } 3149 3150 /* 3151 * IOMMU_RESV_DIRECT and IOMMU_RESV_DIRECT_RELAXABLE regions must be 3152 * mapped before their device is attached, in order to guarantee 3153 * continuity with any FW activity 3154 */ 3155 direct_failed = false; 3156 for_each_group_device(group, gdev) { 3157 if (iommu_create_device_direct_mappings(dom, gdev->dev)) { 3158 direct_failed = true; 3159 dev_warn_once( 3160 gdev->dev->iommu->iommu_dev->dev, 3161 "IOMMU driver was not able to establish FW requested direct mapping."); 3162 } 3163 } 3164 3165 /* We must set default_domain early for __iommu_device_set_domain */ 3166 group->default_domain = dom; 3167 if (!group->domain) { 3168 /* 3169 * Drivers are not allowed to fail the first domain attach. 3170 * The only way to recover from this is to fail attaching the 3171 * iommu driver and call ops->release_device. Put the domain 3172 * in group->default_domain so it is freed after. 3173 */ 3174 ret = __iommu_group_set_domain_internal( 3175 group, dom, IOMMU_SET_DOMAIN_MUST_SUCCEED); 3176 if (WARN_ON(ret)) 3177 goto out_free_old; 3178 } else { 3179 ret = __iommu_group_set_domain(group, dom); 3180 if (ret) 3181 goto err_restore_def_domain; 3182 } 3183 3184 /* 3185 * Drivers are supposed to allow mappings to be installed in a domain 3186 * before device attachment, but some don't. Hack around this defect by 3187 * trying again after attaching. If this happens it means the device 3188 * will not continuously have the IOMMU_RESV_DIRECT map. 3189 */ 3190 if (direct_failed) { 3191 for_each_group_device(group, gdev) { 3192 ret = iommu_create_device_direct_mappings(dom, gdev->dev); 3193 if (ret) 3194 goto err_restore_domain; 3195 } 3196 } 3197 3198out_free_old: 3199 if (old_dom) 3200 iommu_domain_free(old_dom); 3201 return ret; 3202 3203err_restore_domain: 3204 if (old_dom) 3205 __iommu_group_set_domain_internal( 3206 group, old_dom, IOMMU_SET_DOMAIN_MUST_SUCCEED); 3207err_restore_def_domain: 3208 if (old_dom) { 3209 iommu_domain_free(dom); 3210 group->default_domain = old_dom; 3211 } 3212 return ret; 3213} 3214 3215/* 3216 * Changing the default domain through sysfs requires the users to unbind the 3217 * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ 3218 * transition. Return failure if this isn't met. 3219 * 3220 * We need to consider the race between this and the device release path. 3221 * group->mutex is used here to guarantee that the device release path 3222 * will not be entered at the same time. 3223 */ 3224static ssize_t iommu_group_store_type(struct iommu_group *group, 3225 const char *buf, size_t count) 3226{ 3227 struct group_device *gdev; 3228 int ret, req_type; 3229 3230 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) 3231 return -EACCES; 3232 3233 if (WARN_ON(!group) || !group->default_domain) 3234 return -EINVAL; 3235 3236 if (sysfs_streq(buf, "identity")) 3237 req_type = IOMMU_DOMAIN_IDENTITY; 3238 else if (sysfs_streq(buf, "DMA")) 3239 req_type = IOMMU_DOMAIN_DMA; 3240 else if (sysfs_streq(buf, "DMA-FQ")) 3241 req_type = IOMMU_DOMAIN_DMA_FQ; 3242 else if (sysfs_streq(buf, "auto")) 3243 req_type = 0; 3244 else 3245 return -EINVAL; 3246 3247 mutex_lock(&group->mutex); 3248 /* We can bring up a flush queue without tearing down the domain. */ 3249 if (req_type == IOMMU_DOMAIN_DMA_FQ && 3250 group->default_domain->type == IOMMU_DOMAIN_DMA) { 3251 ret = iommu_dma_init_fq(group->default_domain); 3252 if (ret) 3253 goto out_unlock; 3254 3255 group->default_domain->type = IOMMU_DOMAIN_DMA_FQ; 3256 ret = count; 3257 goto out_unlock; 3258 } 3259 3260 /* Otherwise, ensure that device exists and no driver is bound. */ 3261 if (list_empty(&group->devices) || group->owner_cnt) { 3262 ret = -EPERM; 3263 goto out_unlock; 3264 } 3265 3266 ret = iommu_setup_default_domain(group, req_type); 3267 if (ret) 3268 goto out_unlock; 3269 3270 /* Make sure dma_ops is appropriatley set */ 3271 for_each_group_device(group, gdev) 3272 iommu_setup_dma_ops(gdev->dev, group->default_domain); 3273 3274out_unlock: 3275 mutex_unlock(&group->mutex); 3276 return ret ?: count; 3277} 3278 3279/** 3280 * iommu_device_use_default_domain() - Device driver wants to handle device 3281 * DMA through the kernel DMA API. 3282 * @dev: The device. 3283 * 3284 * The device driver about to bind @dev wants to do DMA through the kernel 3285 * DMA API. Return 0 if it is allowed, otherwise an error. 3286 */ 3287int iommu_device_use_default_domain(struct device *dev) 3288{ 3289 /* Caller is the driver core during the pre-probe path */ 3290 struct iommu_group *group = dev->iommu_group; 3291 int ret = 0; 3292 3293 if (!group) 3294 return 0; 3295 3296 mutex_lock(&group->mutex); 3297 /* We may race against bus_iommu_probe() finalising groups here */ 3298 if (!group->default_domain) { 3299 ret = -EPROBE_DEFER; 3300 goto unlock_out; 3301 } 3302 if (group->owner_cnt) { 3303 if (group->domain != group->default_domain || group->owner || 3304 !xa_empty(&group->pasid_array)) { 3305 ret = -EBUSY; 3306 goto unlock_out; 3307 } 3308 } 3309 3310 group->owner_cnt++; 3311 3312unlock_out: 3313 mutex_unlock(&group->mutex); 3314 return ret; 3315} 3316 3317/** 3318 * iommu_device_unuse_default_domain() - Device driver stops handling device 3319 * DMA through the kernel DMA API. 3320 * @dev: The device. 3321 * 3322 * The device driver doesn't want to do DMA through kernel DMA API anymore. 3323 * It must be called after iommu_device_use_default_domain(). 3324 */ 3325void iommu_device_unuse_default_domain(struct device *dev) 3326{ 3327 /* Caller is the driver core during the post-probe path */ 3328 struct iommu_group *group = dev->iommu_group; 3329 3330 if (!group) 3331 return; 3332 3333 mutex_lock(&group->mutex); 3334 if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array))) 3335 group->owner_cnt--; 3336 3337 mutex_unlock(&group->mutex); 3338} 3339 3340static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) 3341{ 3342 struct device *dev = iommu_group_first_dev(group); 3343 const struct iommu_ops *ops = dev_iommu_ops(dev); 3344 struct iommu_domain *domain; 3345 3346 if (group->blocking_domain) 3347 return 0; 3348 3349 if (ops->blocked_domain) { 3350 group->blocking_domain = ops->blocked_domain; 3351 return 0; 3352 } 3353 3354 /* 3355 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED create an 3356 * empty PAGING domain instead. 3357 */ 3358 domain = iommu_paging_domain_alloc(dev); 3359 if (IS_ERR(domain)) 3360 return PTR_ERR(domain); 3361 group->blocking_domain = domain; 3362 return 0; 3363} 3364 3365static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner) 3366{ 3367 int ret; 3368 3369 if ((group->domain && group->domain != group->default_domain) || 3370 !xa_empty(&group->pasid_array)) 3371 return -EBUSY; 3372 3373 ret = __iommu_group_alloc_blocking_domain(group); 3374 if (ret) 3375 return ret; 3376 ret = __iommu_group_set_domain(group, group->blocking_domain); 3377 if (ret) 3378 return ret; 3379 3380 group->owner = owner; 3381 group->owner_cnt++; 3382 return 0; 3383} 3384 3385/** 3386 * iommu_group_claim_dma_owner() - Set DMA ownership of a group 3387 * @group: The group. 3388 * @owner: Caller specified pointer. Used for exclusive ownership. 3389 * 3390 * This is to support backward compatibility for vfio which manages the dma 3391 * ownership in iommu_group level. New invocations on this interface should be 3392 * prohibited. Only a single owner may exist for a group. 3393 */ 3394int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) 3395{ 3396 int ret = 0; 3397 3398 if (WARN_ON(!owner)) 3399 return -EINVAL; 3400 3401 mutex_lock(&group->mutex); 3402 if (group->owner_cnt) { 3403 ret = -EPERM; 3404 goto unlock_out; 3405 } 3406 3407 ret = __iommu_take_dma_ownership(group, owner); 3408unlock_out: 3409 mutex_unlock(&group->mutex); 3410 3411 return ret; 3412} 3413EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); 3414 3415/** 3416 * iommu_device_claim_dma_owner() - Set DMA ownership of a device 3417 * @dev: The device. 3418 * @owner: Caller specified pointer. Used for exclusive ownership. 3419 * 3420 * Claim the DMA ownership of a device. Multiple devices in the same group may 3421 * concurrently claim ownership if they present the same owner value. Returns 0 3422 * on success and error code on failure 3423 */ 3424int iommu_device_claim_dma_owner(struct device *dev, void *owner) 3425{ 3426 /* Caller must be a probed driver on dev */ 3427 struct iommu_group *group = dev->iommu_group; 3428 int ret = 0; 3429 3430 if (WARN_ON(!owner)) 3431 return -EINVAL; 3432 3433 if (!group) 3434 return -ENODEV; 3435 3436 mutex_lock(&group->mutex); 3437 if (group->owner_cnt) { 3438 if (group->owner != owner) { 3439 ret = -EPERM; 3440 goto unlock_out; 3441 } 3442 group->owner_cnt++; 3443 goto unlock_out; 3444 } 3445 3446 ret = __iommu_take_dma_ownership(group, owner); 3447unlock_out: 3448 mutex_unlock(&group->mutex); 3449 return ret; 3450} 3451EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner); 3452 3453static void __iommu_release_dma_ownership(struct iommu_group *group) 3454{ 3455 if (WARN_ON(!group->owner_cnt || !group->owner || 3456 !xa_empty(&group->pasid_array))) 3457 return; 3458 3459 group->owner_cnt = 0; 3460 group->owner = NULL; 3461 __iommu_group_set_domain_nofail(group, group->default_domain); 3462} 3463 3464/** 3465 * iommu_group_release_dma_owner() - Release DMA ownership of a group 3466 * @group: The group 3467 * 3468 * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). 3469 */ 3470void iommu_group_release_dma_owner(struct iommu_group *group) 3471{ 3472 mutex_lock(&group->mutex); 3473 __iommu_release_dma_ownership(group); 3474 mutex_unlock(&group->mutex); 3475} 3476EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); 3477 3478/** 3479 * iommu_device_release_dma_owner() - Release DMA ownership of a device 3480 * @dev: The device. 3481 * 3482 * Release the DMA ownership claimed by iommu_device_claim_dma_owner(). 3483 */ 3484void iommu_device_release_dma_owner(struct device *dev) 3485{ 3486 /* Caller must be a probed driver on dev */ 3487 struct iommu_group *group = dev->iommu_group; 3488 3489 mutex_lock(&group->mutex); 3490 if (group->owner_cnt > 1) 3491 group->owner_cnt--; 3492 else 3493 __iommu_release_dma_ownership(group); 3494 mutex_unlock(&group->mutex); 3495} 3496EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner); 3497 3498/** 3499 * iommu_group_dma_owner_claimed() - Query group dma ownership status 3500 * @group: The group. 3501 * 3502 * This provides status query on a given group. It is racy and only for 3503 * non-binding status reporting. 3504 */ 3505bool iommu_group_dma_owner_claimed(struct iommu_group *group) 3506{ 3507 unsigned int user; 3508 3509 mutex_lock(&group->mutex); 3510 user = group->owner_cnt; 3511 mutex_unlock(&group->mutex); 3512 3513 return user; 3514} 3515EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); 3516 3517static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, 3518 struct iommu_domain *domain) 3519{ 3520 const struct iommu_ops *ops = dev_iommu_ops(dev); 3521 struct iommu_domain *blocked_domain = ops->blocked_domain; 3522 3523 WARN_ON(blocked_domain->ops->set_dev_pasid(blocked_domain, 3524 dev, pasid, domain)); 3525} 3526 3527static int __iommu_set_group_pasid(struct iommu_domain *domain, 3528 struct iommu_group *group, ioasid_t pasid, 3529 struct iommu_domain *old) 3530{ 3531 struct group_device *device, *last_gdev; 3532 int ret; 3533 3534 for_each_group_device(group, device) { 3535 if (device->dev->iommu->max_pasids > 0) { 3536 ret = domain->ops->set_dev_pasid(domain, device->dev, 3537 pasid, old); 3538 if (ret) 3539 goto err_revert; 3540 } 3541 } 3542 3543 return 0; 3544 3545err_revert: 3546 last_gdev = device; 3547 for_each_group_device(group, device) { 3548 if (device == last_gdev) 3549 break; 3550 if (device->dev->iommu->max_pasids > 0) { 3551 /* 3552 * If no old domain, undo the succeeded devices/pasid. 3553 * Otherwise, rollback the succeeded devices/pasid to 3554 * the old domain. And it is a driver bug to fail 3555 * attaching with a previously good domain. 3556 */ 3557 if (!old || 3558 WARN_ON(old->ops->set_dev_pasid(old, device->dev, 3559 pasid, domain))) 3560 iommu_remove_dev_pasid(device->dev, pasid, domain); 3561 } 3562 } 3563 return ret; 3564} 3565 3566static void __iommu_remove_group_pasid(struct iommu_group *group, 3567 ioasid_t pasid, 3568 struct iommu_domain *domain) 3569{ 3570 struct group_device *device; 3571 3572 for_each_group_device(group, device) { 3573 if (device->dev->iommu->max_pasids > 0) 3574 iommu_remove_dev_pasid(device->dev, pasid, domain); 3575 } 3576} 3577 3578/* 3579 * iommu_attach_device_pasid() - Attach a domain to pasid of device 3580 * @domain: the iommu domain. 3581 * @dev: the attached device. 3582 * @pasid: the pasid of the device. 3583 * @handle: the attach handle. 3584 * 3585 * Caller should always provide a new handle to avoid race with the paths 3586 * that have lockless reference to handle if it intends to pass a valid handle. 3587 * 3588 * Return: 0 on success, or an error. 3589 */ 3590int iommu_attach_device_pasid(struct iommu_domain *domain, 3591 struct device *dev, ioasid_t pasid, 3592 struct iommu_attach_handle *handle) 3593{ 3594 /* Caller must be a probed driver on dev */ 3595 struct iommu_group *group = dev->iommu_group; 3596 struct group_device *device; 3597 const struct iommu_ops *ops; 3598 void *entry; 3599 int ret; 3600 3601 if (!group) 3602 return -ENODEV; 3603 3604 ops = dev_iommu_ops(dev); 3605 3606 if (!domain->ops->set_dev_pasid || 3607 !ops->blocked_domain || 3608 !ops->blocked_domain->ops->set_dev_pasid) 3609 return -EOPNOTSUPP; 3610 3611 if (!domain_iommu_ops_compatible(ops, domain) || 3612 pasid == IOMMU_NO_PASID) 3613 return -EINVAL; 3614 3615 mutex_lock(&group->mutex); 3616 3617 /* 3618 * This is a concurrent attach during a device reset. Reject it until 3619 * pci_dev_reset_iommu_done() attaches the device to group->domain. 3620 */ 3621 if (group->resetting_domain) { 3622 ret = -EBUSY; 3623 goto out_unlock; 3624 } 3625 3626 for_each_group_device(group, device) { 3627 /* 3628 * Skip PASID validation for devices without PASID support 3629 * (max_pasids = 0). These devices cannot issue transactions 3630 * with PASID, so they don't affect group's PASID usage. 3631 */ 3632 if ((device->dev->iommu->max_pasids > 0) && 3633 (pasid >= device->dev->iommu->max_pasids)) { 3634 ret = -EINVAL; 3635 goto out_unlock; 3636 } 3637 } 3638 3639 entry = iommu_make_pasid_array_entry(domain, handle); 3640 3641 /* 3642 * Entry present is a failure case. Use xa_insert() instead of 3643 * xa_reserve(). 3644 */ 3645 ret = xa_insert(&group->pasid_array, pasid, XA_ZERO_ENTRY, GFP_KERNEL); 3646 if (ret) 3647 goto out_unlock; 3648 3649 ret = __iommu_set_group_pasid(domain, group, pasid, NULL); 3650 if (ret) { 3651 xa_release(&group->pasid_array, pasid); 3652 goto out_unlock; 3653 } 3654 3655 /* 3656 * The xa_insert() above reserved the memory, and the group->mutex is 3657 * held, this cannot fail. The new domain cannot be visible until the 3658 * operation succeeds as we cannot tolerate PRIs becoming concurrently 3659 * queued and then failing attach. 3660 */ 3661 WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3662 pasid, entry, GFP_KERNEL))); 3663 3664out_unlock: 3665 mutex_unlock(&group->mutex); 3666 return ret; 3667} 3668EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); 3669 3670/** 3671 * iommu_replace_device_pasid - Replace the domain that a specific pasid 3672 * of the device is attached to 3673 * @domain: the new iommu domain 3674 * @dev: the attached device. 3675 * @pasid: the pasid of the device. 3676 * @handle: the attach handle. 3677 * 3678 * This API allows the pasid to switch domains. The @pasid should have been 3679 * attached. Otherwise, this fails. The pasid will keep the old configuration 3680 * if replacement failed. 3681 * 3682 * Caller should always provide a new handle to avoid race with the paths 3683 * that have lockless reference to handle if it intends to pass a valid handle. 3684 * 3685 * Return 0 on success, or an error. 3686 */ 3687int iommu_replace_device_pasid(struct iommu_domain *domain, 3688 struct device *dev, ioasid_t pasid, 3689 struct iommu_attach_handle *handle) 3690{ 3691 /* Caller must be a probed driver on dev */ 3692 struct iommu_group *group = dev->iommu_group; 3693 struct iommu_attach_handle *entry; 3694 struct iommu_domain *curr_domain; 3695 void *curr; 3696 int ret; 3697 3698 if (!group) 3699 return -ENODEV; 3700 3701 if (!domain->ops->set_dev_pasid) 3702 return -EOPNOTSUPP; 3703 3704 if (!domain_iommu_ops_compatible(dev_iommu_ops(dev), domain) || 3705 pasid == IOMMU_NO_PASID || !handle) 3706 return -EINVAL; 3707 3708 mutex_lock(&group->mutex); 3709 3710 /* 3711 * This is a concurrent attach during a device reset. Reject it until 3712 * pci_dev_reset_iommu_done() attaches the device to group->domain. 3713 */ 3714 if (group->resetting_domain) { 3715 ret = -EBUSY; 3716 goto out_unlock; 3717 } 3718 3719 entry = iommu_make_pasid_array_entry(domain, handle); 3720 curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, 3721 XA_ZERO_ENTRY, GFP_KERNEL); 3722 if (xa_is_err(curr)) { 3723 ret = xa_err(curr); 3724 goto out_unlock; 3725 } 3726 3727 /* 3728 * No domain (with or without handle) attached, hence not 3729 * a replace case. 3730 */ 3731 if (!curr) { 3732 xa_release(&group->pasid_array, pasid); 3733 ret = -EINVAL; 3734 goto out_unlock; 3735 } 3736 3737 /* 3738 * Reusing handle is problematic as there are paths that refers 3739 * the handle without lock. To avoid race, reject the callers that 3740 * attempt it. 3741 */ 3742 if (curr == entry) { 3743 WARN_ON(1); 3744 ret = -EINVAL; 3745 goto out_unlock; 3746 } 3747 3748 curr_domain = pasid_array_entry_to_domain(curr); 3749 ret = 0; 3750 3751 if (curr_domain != domain) { 3752 ret = __iommu_set_group_pasid(domain, group, 3753 pasid, curr_domain); 3754 if (ret) 3755 goto out_unlock; 3756 } 3757 3758 /* 3759 * The above xa_cmpxchg() reserved the memory, and the 3760 * group->mutex is held, this cannot fail. 3761 */ 3762 WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3763 pasid, entry, GFP_KERNEL))); 3764 3765out_unlock: 3766 mutex_unlock(&group->mutex); 3767 return ret; 3768} 3769EXPORT_SYMBOL_NS_GPL(iommu_replace_device_pasid, "IOMMUFD_INTERNAL"); 3770 3771/* 3772 * iommu_detach_device_pasid() - Detach the domain from pasid of device 3773 * @domain: the iommu domain. 3774 * @dev: the attached device. 3775 * @pasid: the pasid of the device. 3776 * 3777 * The @domain must have been attached to @pasid of the @dev with 3778 * iommu_attach_device_pasid(). 3779 */ 3780void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, 3781 ioasid_t pasid) 3782{ 3783 /* Caller must be a probed driver on dev */ 3784 struct iommu_group *group = dev->iommu_group; 3785 3786 mutex_lock(&group->mutex); 3787 __iommu_remove_group_pasid(group, pasid, domain); 3788 xa_erase(&group->pasid_array, pasid); 3789 mutex_unlock(&group->mutex); 3790} 3791EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); 3792 3793ioasid_t iommu_alloc_global_pasid(struct device *dev) 3794{ 3795 int ret; 3796 3797 /* max_pasids == 0 means that the device does not support PASID */ 3798 if (!dev->iommu->max_pasids) 3799 return IOMMU_PASID_INVALID; 3800 3801 /* 3802 * max_pasids is set up by vendor driver based on number of PASID bits 3803 * supported but the IDA allocation is inclusive. 3804 */ 3805 ret = ida_alloc_range(&iommu_global_pasid_ida, IOMMU_FIRST_GLOBAL_PASID, 3806 dev->iommu->max_pasids - 1, GFP_KERNEL); 3807 return ret < 0 ? IOMMU_PASID_INVALID : ret; 3808} 3809EXPORT_SYMBOL_GPL(iommu_alloc_global_pasid); 3810 3811void iommu_free_global_pasid(ioasid_t pasid) 3812{ 3813 if (WARN_ON(pasid == IOMMU_PASID_INVALID)) 3814 return; 3815 3816 ida_free(&iommu_global_pasid_ida, pasid); 3817} 3818EXPORT_SYMBOL_GPL(iommu_free_global_pasid); 3819 3820/** 3821 * iommu_attach_handle_get - Return the attach handle 3822 * @group: the iommu group that domain was attached to 3823 * @pasid: the pasid within the group 3824 * @type: matched domain type, 0 for any match 3825 * 3826 * Return handle or ERR_PTR(-ENOENT) on none, ERR_PTR(-EBUSY) on mismatch. 3827 * 3828 * Return the attach handle to the caller. The life cycle of an iommu attach 3829 * handle is from the time when the domain is attached to the time when the 3830 * domain is detached. Callers are required to synchronize the call of 3831 * iommu_attach_handle_get() with domain attachment and detachment. The attach 3832 * handle can only be used during its life cycle. 3833 */ 3834struct iommu_attach_handle * 3835iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type) 3836{ 3837 struct iommu_attach_handle *handle; 3838 void *entry; 3839 3840 xa_lock(&group->pasid_array); 3841 entry = xa_load(&group->pasid_array, pasid); 3842 if (!entry || xa_pointer_tag(entry) != IOMMU_PASID_ARRAY_HANDLE) { 3843 handle = ERR_PTR(-ENOENT); 3844 } else { 3845 handle = xa_untag_pointer(entry); 3846 if (type && handle->domain->type != type) 3847 handle = ERR_PTR(-EBUSY); 3848 } 3849 xa_unlock(&group->pasid_array); 3850 3851 return handle; 3852} 3853EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, "IOMMUFD_INTERNAL"); 3854 3855/** 3856 * iommu_attach_group_handle - Attach an IOMMU domain to an IOMMU group 3857 * @domain: IOMMU domain to attach 3858 * @group: IOMMU group that will be attached 3859 * @handle: attach handle 3860 * 3861 * Returns 0 on success and error code on failure. 3862 * 3863 * This is a variant of iommu_attach_group(). It allows the caller to provide 3864 * an attach handle and use it when the domain is attached. This is currently 3865 * used by IOMMUFD to deliver the I/O page faults. 3866 * 3867 * Caller should always provide a new handle to avoid race with the paths 3868 * that have lockless reference to handle. 3869 */ 3870int iommu_attach_group_handle(struct iommu_domain *domain, 3871 struct iommu_group *group, 3872 struct iommu_attach_handle *handle) 3873{ 3874 void *entry; 3875 int ret; 3876 3877 if (!handle) 3878 return -EINVAL; 3879 3880 mutex_lock(&group->mutex); 3881 entry = iommu_make_pasid_array_entry(domain, handle); 3882 ret = xa_insert(&group->pasid_array, 3883 IOMMU_NO_PASID, XA_ZERO_ENTRY, GFP_KERNEL); 3884 if (ret) 3885 goto out_unlock; 3886 3887 ret = __iommu_attach_group(domain, group); 3888 if (ret) { 3889 xa_release(&group->pasid_array, IOMMU_NO_PASID); 3890 goto out_unlock; 3891 } 3892 3893 /* 3894 * The xa_insert() above reserved the memory, and the group->mutex is 3895 * held, this cannot fail. The new domain cannot be visible until the 3896 * operation succeeds as we cannot tolerate PRIs becoming concurrently 3897 * queued and then failing attach. 3898 */ 3899 WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3900 IOMMU_NO_PASID, entry, GFP_KERNEL))); 3901 3902out_unlock: 3903 mutex_unlock(&group->mutex); 3904 return ret; 3905} 3906EXPORT_SYMBOL_NS_GPL(iommu_attach_group_handle, "IOMMUFD_INTERNAL"); 3907 3908/** 3909 * iommu_detach_group_handle - Detach an IOMMU domain from an IOMMU group 3910 * @domain: IOMMU domain to attach 3911 * @group: IOMMU group that will be attached 3912 * 3913 * Detach the specified IOMMU domain from the specified IOMMU group. 3914 * It must be used in conjunction with iommu_attach_group_handle(). 3915 */ 3916void iommu_detach_group_handle(struct iommu_domain *domain, 3917 struct iommu_group *group) 3918{ 3919 mutex_lock(&group->mutex); 3920 __iommu_group_set_core_domain(group); 3921 xa_erase(&group->pasid_array, IOMMU_NO_PASID); 3922 mutex_unlock(&group->mutex); 3923} 3924EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, "IOMMUFD_INTERNAL"); 3925 3926/** 3927 * iommu_replace_group_handle - replace the domain that a group is attached to 3928 * @group: IOMMU group that will be attached to the new domain 3929 * @new_domain: new IOMMU domain to replace with 3930 * @handle: attach handle 3931 * 3932 * This API allows the group to switch domains without being forced to go to 3933 * the blocking domain in-between. It allows the caller to provide an attach 3934 * handle for the new domain and use it when the domain is attached. 3935 * 3936 * If the currently attached domain is a core domain (e.g. a default_domain), 3937 * it will act just like the iommu_attach_group_handle(). 3938 * 3939 * Caller should always provide a new handle to avoid race with the paths 3940 * that have lockless reference to handle. 3941 */ 3942int iommu_replace_group_handle(struct iommu_group *group, 3943 struct iommu_domain *new_domain, 3944 struct iommu_attach_handle *handle) 3945{ 3946 void *curr, *entry; 3947 int ret; 3948 3949 if (!new_domain || !handle) 3950 return -EINVAL; 3951 3952 mutex_lock(&group->mutex); 3953 entry = iommu_make_pasid_array_entry(new_domain, handle); 3954 ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL); 3955 if (ret) 3956 goto err_unlock; 3957 3958 ret = __iommu_group_set_domain(group, new_domain); 3959 if (ret) 3960 goto err_release; 3961 3962 curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, entry, GFP_KERNEL); 3963 WARN_ON(xa_is_err(curr)); 3964 3965 mutex_unlock(&group->mutex); 3966 3967 return 0; 3968err_release: 3969 xa_release(&group->pasid_array, IOMMU_NO_PASID); 3970err_unlock: 3971 mutex_unlock(&group->mutex); 3972 return ret; 3973} 3974EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL"); 3975 3976/** 3977 * pci_dev_reset_iommu_prepare() - Block IOMMU to prepare for a PCI device reset 3978 * @pdev: PCI device that is going to enter a reset routine 3979 * 3980 * The PCIe r6.0, sec 10.3.1 IMPLEMENTATION NOTE recommends to disable and block 3981 * ATS before initiating a reset. This means that a PCIe device during the reset 3982 * routine wants to block any IOMMU activity: translation and ATS invalidation. 3983 * 3984 * This function attaches the device's RID/PASID(s) the group->blocking_domain, 3985 * setting the group->resetting_domain. This allows the IOMMU driver pausing any 3986 * IOMMU activity while leaving the group->domain pointer intact. Later when the 3987 * reset is finished, pci_dev_reset_iommu_done() can restore everything. 3988 * 3989 * Caller must use pci_dev_reset_iommu_prepare() with pci_dev_reset_iommu_done() 3990 * before/after the core-level reset routine, to unset the resetting_domain. 3991 * 3992 * Return: 0 on success or negative error code if the preparation failed. 3993 * 3994 * These two functions are designed to be used by PCI reset functions that would 3995 * not invoke any racy iommu_release_device(), since PCI sysfs node gets removed 3996 * before it notifies with a BUS_NOTIFY_REMOVED_DEVICE. When using them in other 3997 * case, callers must ensure there will be no racy iommu_release_device() call, 3998 * which otherwise would UAF the dev->iommu_group pointer. 3999 */ 4000int pci_dev_reset_iommu_prepare(struct pci_dev *pdev) 4001{ 4002 struct iommu_group *group = pdev->dev.iommu_group; 4003 unsigned long pasid; 4004 void *entry; 4005 int ret; 4006 4007 if (!pci_ats_supported(pdev) || !dev_has_iommu(&pdev->dev)) 4008 return 0; 4009 4010 guard(mutex)(&group->mutex); 4011 4012 /* Re-entry is not allowed */ 4013 if (WARN_ON(group->resetting_domain)) 4014 return -EBUSY; 4015 4016 ret = __iommu_group_alloc_blocking_domain(group); 4017 if (ret) 4018 return ret; 4019 4020 /* Stage RID domain at blocking_domain while retaining group->domain */ 4021 if (group->domain != group->blocking_domain) { 4022 ret = __iommu_attach_device(group->blocking_domain, &pdev->dev, 4023 group->domain); 4024 if (ret) 4025 return ret; 4026 } 4027 4028 /* 4029 * Stage PASID domains at blocking_domain while retaining pasid_array. 4030 * 4031 * The pasid_array is mostly fenced by group->mutex, except one reader 4032 * in iommu_attach_handle_get(), so it's safe to read without xa_lock. 4033 */ 4034 xa_for_each_start(&group->pasid_array, pasid, entry, 1) 4035 iommu_remove_dev_pasid(&pdev->dev, pasid, 4036 pasid_array_entry_to_domain(entry)); 4037 4038 group->resetting_domain = group->blocking_domain; 4039 return ret; 4040} 4041EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_prepare); 4042 4043/** 4044 * pci_dev_reset_iommu_done() - Restore IOMMU after a PCI device reset is done 4045 * @pdev: PCI device that has finished a reset routine 4046 * 4047 * After a PCIe device finishes a reset routine, it wants to restore its IOMMU 4048 * IOMMU activity, including new translation as well as cache invalidation, by 4049 * re-attaching all RID/PASID of the device's back to the domains retained in 4050 * the core-level structure. 4051 * 4052 * Caller must pair it with a successful pci_dev_reset_iommu_prepare(). 4053 * 4054 * Note that, although unlikely, there is a risk that re-attaching domains might 4055 * fail due to some unexpected happening like OOM. 4056 */ 4057void pci_dev_reset_iommu_done(struct pci_dev *pdev) 4058{ 4059 struct iommu_group *group = pdev->dev.iommu_group; 4060 unsigned long pasid; 4061 void *entry; 4062 4063 if (!pci_ats_supported(pdev) || !dev_has_iommu(&pdev->dev)) 4064 return; 4065 4066 guard(mutex)(&group->mutex); 4067 4068 /* pci_dev_reset_iommu_prepare() was bypassed for the device */ 4069 if (!group->resetting_domain) 4070 return; 4071 4072 /* pci_dev_reset_iommu_prepare() was not successfully called */ 4073 if (WARN_ON(!group->blocking_domain)) 4074 return; 4075 4076 /* Re-attach RID domain back to group->domain */ 4077 if (group->domain != group->blocking_domain) { 4078 WARN_ON(__iommu_attach_device(group->domain, &pdev->dev, 4079 group->blocking_domain)); 4080 } 4081 4082 /* 4083 * Re-attach PASID domains back to the domains retained in pasid_array. 4084 * 4085 * The pasid_array is mostly fenced by group->mutex, except one reader 4086 * in iommu_attach_handle_get(), so it's safe to read without xa_lock. 4087 */ 4088 xa_for_each_start(&group->pasid_array, pasid, entry, 1) 4089 WARN_ON(__iommu_set_group_pasid( 4090 pasid_array_entry_to_domain(entry), group, pasid, 4091 group->blocking_domain)); 4092 4093 group->resetting_domain = NULL; 4094} 4095EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_done); 4096 4097#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) 4098/** 4099 * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain 4100 * @desc: MSI descriptor, will store the MSI page 4101 * @msi_addr: MSI target address to be mapped 4102 * 4103 * The implementation of sw_msi() should take msi_addr and map it to 4104 * an IOVA in the domain and call msi_desc_set_iommu_msi_iova() with the 4105 * mapping information. 4106 * 4107 * Return: 0 on success or negative error code if the mapping failed. 4108 */ 4109int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) 4110{ 4111 struct device *dev = msi_desc_to_dev(desc); 4112 struct iommu_group *group = dev->iommu_group; 4113 int ret = 0; 4114 4115 if (!group) 4116 return 0; 4117 4118 mutex_lock(&group->mutex); 4119 /* An IDENTITY domain must pass through */ 4120 if (group->domain && group->domain->type != IOMMU_DOMAIN_IDENTITY) { 4121 switch (group->domain->cookie_type) { 4122 case IOMMU_COOKIE_DMA_MSI: 4123 case IOMMU_COOKIE_DMA_IOVA: 4124 ret = iommu_dma_sw_msi(group->domain, desc, msi_addr); 4125 break; 4126 case IOMMU_COOKIE_IOMMUFD: 4127 ret = iommufd_sw_msi(group->domain, desc, msi_addr); 4128 break; 4129 default: 4130 ret = -EOPNOTSUPP; 4131 break; 4132 } 4133 } 4134 mutex_unlock(&group->mutex); 4135 return ret; 4136} 4137#endif /* CONFIG_IRQ_MSI_IOMMU */