Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at master 4115 lines 102 kB view raw
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 * Leo Duran <leo.duran@amd.com> 6 */ 7 8#define pr_fmt(fmt) "AMD-Vi: " fmt 9#define dev_fmt(fmt) pr_fmt(fmt) 10 11#include <linux/pci.h> 12#include <linux/acpi.h> 13#include <linux/list.h> 14#include <linux/bitmap.h> 15#include <linux/syscore_ops.h> 16#include <linux/interrupt.h> 17#include <linux/msi.h> 18#include <linux/irq.h> 19#include <linux/amd-iommu.h> 20#include <linux/export.h> 21#include <linux/kmemleak.h> 22#include <linux/cc_platform.h> 23#include <linux/iopoll.h> 24#include <asm/pci-direct.h> 25#include <asm/iommu.h> 26#include <asm/apic.h> 27#include <asm/gart.h> 28#include <asm/x86_init.h> 29#include <asm/io_apic.h> 30#include <asm/irq_remapping.h> 31#include <asm/set_memory.h> 32#include <asm/sev.h> 33 34#include <linux/crash_dump.h> 35 36#include "amd_iommu.h" 37#include "../irq_remapping.h" 38#include "../iommu-pages.h" 39 40/* 41 * definitions for the ACPI scanning code 42 */ 43#define IVRS_HEADER_LENGTH 48 44 45#define ACPI_IVHD_TYPE_MAX_SUPPORTED 0x40 46#define ACPI_IVMD_TYPE_ALL 0x20 47#define ACPI_IVMD_TYPE 0x21 48#define ACPI_IVMD_TYPE_RANGE 0x22 49 50#define IVHD_DEV_ALL 0x01 51#define IVHD_DEV_SELECT 0x02 52#define IVHD_DEV_SELECT_RANGE_START 0x03 53#define IVHD_DEV_RANGE_END 0x04 54#define IVHD_DEV_ALIAS 0x42 55#define IVHD_DEV_ALIAS_RANGE 0x43 56#define IVHD_DEV_EXT_SELECT 0x46 57#define IVHD_DEV_EXT_SELECT_RANGE 0x47 58#define IVHD_DEV_SPECIAL 0x48 59#define IVHD_DEV_ACPI_HID 0xf0 60 61#define UID_NOT_PRESENT 0 62#define UID_IS_INTEGER 1 63#define UID_IS_CHARACTER 2 64 65#define IVHD_SPECIAL_IOAPIC 1 66#define IVHD_SPECIAL_HPET 2 67 68#define IVHD_FLAG_HT_TUN_EN_MASK 0x01 69#define IVHD_FLAG_PASSPW_EN_MASK 0x02 70#define IVHD_FLAG_RESPASSPW_EN_MASK 0x04 71#define IVHD_FLAG_ISOC_EN_MASK 0x08 72 73#define IVMD_FLAG_EXCL_RANGE 0x08 74#define IVMD_FLAG_IW 0x04 75#define IVMD_FLAG_IR 0x02 76#define IVMD_FLAG_UNITY_MAP 0x01 77 78#define ACPI_DEVFLAG_INITPASS 0x01 79#define ACPI_DEVFLAG_EXTINT 0x02 80#define ACPI_DEVFLAG_NMI 0x04 81#define ACPI_DEVFLAG_SYSMGT1 0x10 82#define ACPI_DEVFLAG_SYSMGT2 0x20 83#define ACPI_DEVFLAG_LINT0 0x40 84#define ACPI_DEVFLAG_LINT1 0x80 85#define ACPI_DEVFLAG_ATSDIS 0x10000000 86 87#define IVRS_GET_SBDF_ID(seg, bus, dev, fn) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \ 88 | ((dev & 0x1f) << 3) | (fn & 0x7)) 89 90/* 91 * ACPI table definitions 92 * 93 * These data structures are laid over the table to parse the important values 94 * out of it. 95 */ 96 97/* 98 * structure describing one IOMMU in the ACPI table. Typically followed by one 99 * or more ivhd_entrys. 100 */ 101struct ivhd_header { 102 u8 type; 103 u8 flags; 104 u16 length; 105 u16 devid; 106 u16 cap_ptr; 107 u64 mmio_phys; 108 u16 pci_seg; 109 u16 info; 110 u32 efr_attr; 111 112 /* Following only valid on IVHD type 11h and 40h */ 113 u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */ 114 u64 efr_reg2; 115} __attribute__((packed)); 116 117/* 118 * A device entry describing which devices a specific IOMMU translates and 119 * which requestor ids they use. 120 */ 121struct ivhd_entry { 122 u8 type; 123 u16 devid; 124 u8 flags; 125 struct_group(ext_hid, 126 u32 ext; 127 u32 hidh; 128 ); 129 u64 cid; 130 u8 uidf; 131 u8 uidl; 132 u8 uid; 133} __attribute__((packed)); 134 135int amd_iommu_evtlog_size = EVTLOG_SIZE_DEF; 136int amd_iommu_pprlog_size = PPRLOG_SIZE_DEF; 137 138/* 139 * An AMD IOMMU memory definition structure. It defines things like exclusion 140 * ranges for devices and regions that should be unity mapped. 141 */ 142struct ivmd_header { 143 u8 type; 144 u8 flags; 145 u16 length; 146 u16 devid; 147 u16 aux; 148 u16 pci_seg; 149 u8 resv[6]; 150 u64 range_start; 151 u64 range_length; 152} __attribute__((packed)); 153 154bool amd_iommu_dump; 155bool amd_iommu_irq_remap __read_mostly; 156 157enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1; 158/* Host page table level */ 159u8 amd_iommu_hpt_level; 160/* Guest page table level */ 161int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL; 162 163int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; 164static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; 165 166static bool amd_iommu_detected; 167static bool amd_iommu_disabled __initdata; 168static bool amd_iommu_force_enable __initdata; 169static bool amd_iommu_irtcachedis; 170static int amd_iommu_target_ivhd_type; 171 172/* Global EFR and EFR2 registers */ 173u64 amd_iommu_efr; 174u64 amd_iommu_efr2; 175 176/* Host (v1) page table is not supported*/ 177bool amd_iommu_hatdis; 178 179/* SNP is enabled on the system? */ 180bool amd_iommu_snp_en; 181EXPORT_SYMBOL(amd_iommu_snp_en); 182 183LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */ 184LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ 185LIST_HEAD(amd_ivhd_dev_flags_list); /* list of all IVHD device entry settings */ 186 187/* Number of IOMMUs present in the system */ 188static int amd_iommus_present; 189 190/* IOMMUs have a non-present cache? */ 191bool amd_iommu_np_cache __read_mostly; 192bool amd_iommu_iotlb_sup __read_mostly = true; 193 194static bool amd_iommu_pc_present __read_mostly; 195bool amdr_ivrs_remap_support __read_mostly; 196 197bool amd_iommu_force_isolation __read_mostly; 198 199unsigned long amd_iommu_pgsize_bitmap __ro_after_init = AMD_IOMMU_PGSIZES; 200 201enum iommu_init_state { 202 IOMMU_START_STATE, 203 IOMMU_IVRS_DETECTED, 204 IOMMU_ACPI_FINISHED, 205 IOMMU_ENABLED, 206 IOMMU_PCI_INIT, 207 IOMMU_INTERRUPTS_EN, 208 IOMMU_INITIALIZED, 209 IOMMU_NOT_FOUND, 210 IOMMU_INIT_ERROR, 211 IOMMU_CMDLINE_DISABLED, 212}; 213 214/* Early ioapic and hpet maps from kernel command line */ 215#define EARLY_MAP_SIZE 4 216static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE]; 217static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE]; 218static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE]; 219 220static int __initdata early_ioapic_map_size; 221static int __initdata early_hpet_map_size; 222static int __initdata early_acpihid_map_size; 223 224static bool __initdata cmdline_maps; 225 226static enum iommu_init_state init_state = IOMMU_START_STATE; 227 228static int amd_iommu_enable_interrupts(void); 229static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg); 230 231static bool amd_iommu_pre_enabled = true; 232 233static u32 amd_iommu_ivinfo __initdata; 234 235bool translation_pre_enabled(struct amd_iommu *iommu) 236{ 237 return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED); 238} 239 240static void clear_translation_pre_enabled(struct amd_iommu *iommu) 241{ 242 iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; 243} 244 245static void init_translation_status(struct amd_iommu *iommu) 246{ 247 u64 ctrl; 248 249 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 250 if (ctrl & (1<<CONTROL_IOMMU_EN)) 251 iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; 252} 253 254int amd_iommu_get_num_iommus(void) 255{ 256 return amd_iommus_present; 257} 258 259bool amd_iommu_ht_range_ignore(void) 260{ 261 return check_feature2(FEATURE_HT_RANGE_IGNORE); 262} 263 264/* 265 * Iterate through all the IOMMUs to get common EFR 266 * masks among all IOMMUs and warn if found inconsistency. 267 */ 268static __init void get_global_efr(void) 269{ 270 struct amd_iommu *iommu; 271 272 for_each_iommu(iommu) { 273 u64 tmp = iommu->features; 274 u64 tmp2 = iommu->features2; 275 276 if (list_is_first(&iommu->list, &amd_iommu_list)) { 277 amd_iommu_efr = tmp; 278 amd_iommu_efr2 = tmp2; 279 continue; 280 } 281 282 if (amd_iommu_efr == tmp && 283 amd_iommu_efr2 == tmp2) 284 continue; 285 286 pr_err(FW_BUG 287 "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n", 288 tmp, tmp2, amd_iommu_efr, amd_iommu_efr2, 289 iommu->index, iommu->pci_seg->id, 290 PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid), 291 PCI_FUNC(iommu->devid)); 292 293 amd_iommu_efr &= tmp; 294 amd_iommu_efr2 &= tmp2; 295 } 296 297 pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2); 298} 299 300/* 301 * For IVHD type 0x11/0x40, EFR is also available via IVHD. 302 * Default to IVHD EFR since it is available sooner 303 * (i.e. before PCI init). 304 */ 305static void __init early_iommu_features_init(struct amd_iommu *iommu, 306 struct ivhd_header *h) 307{ 308 if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) { 309 iommu->features = h->efr_reg; 310 iommu->features2 = h->efr_reg2; 311 } 312 if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP) 313 amdr_ivrs_remap_support = true; 314} 315 316/* Access to l1 and l2 indexed register spaces */ 317 318static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) 319{ 320 u32 val; 321 322 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); 323 pci_read_config_dword(iommu->dev, 0xfc, &val); 324 return val; 325} 326 327static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val) 328{ 329 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31)); 330 pci_write_config_dword(iommu->dev, 0xfc, val); 331 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); 332} 333 334static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address) 335{ 336 u32 val; 337 338 pci_write_config_dword(iommu->dev, 0xf0, address); 339 pci_read_config_dword(iommu->dev, 0xf4, &val); 340 return val; 341} 342 343static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) 344{ 345 pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8)); 346 pci_write_config_dword(iommu->dev, 0xf4, val); 347} 348 349/**************************************************************************** 350 * 351 * AMD IOMMU MMIO register space handling functions 352 * 353 * These functions are used to program the IOMMU device registers in 354 * MMIO space required for that driver. 355 * 356 ****************************************************************************/ 357 358/* 359 * This function set the exclusion range in the IOMMU. DMA accesses to the 360 * exclusion range are passed through untranslated 361 */ 362static void iommu_set_exclusion_range(struct amd_iommu *iommu) 363{ 364 u64 start = iommu->exclusion_start & PAGE_MASK; 365 u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK; 366 u64 entry; 367 368 if (!iommu->exclusion_start) 369 return; 370 371 entry = start | MMIO_EXCL_ENABLE_MASK; 372 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, 373 &entry, sizeof(entry)); 374 375 entry = limit; 376 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, 377 &entry, sizeof(entry)); 378} 379 380static void iommu_set_cwwb_range(struct amd_iommu *iommu) 381{ 382 u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem); 383 u64 entry = start & PM_ADDR_MASK; 384 385 if (!check_feature(FEATURE_SNP)) 386 return; 387 388 /* Note: 389 * Re-purpose Exclusion base/limit registers for Completion wait 390 * write-back base/limit. 391 */ 392 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, 393 &entry, sizeof(entry)); 394 395 /* Note: 396 * Default to 4 Kbytes, which can be specified by setting base 397 * address equal to the limit address. 398 */ 399 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, 400 &entry, sizeof(entry)); 401} 402 403/* Programs the physical address of the device table into the IOMMU hardware */ 404static void iommu_set_device_table(struct amd_iommu *iommu) 405{ 406 u64 entry; 407 u32 dev_table_size = iommu->pci_seg->dev_table_size; 408 void *dev_table = (void *)get_dev_table(iommu); 409 410 BUG_ON(iommu->mmio_base == NULL); 411 412 if (is_kdump_kernel()) 413 return; 414 415 entry = iommu_virt_to_phys(dev_table); 416 entry |= (dev_table_size >> 12) - 1; 417 memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, 418 &entry, sizeof(entry)); 419} 420 421static void iommu_feature_set(struct amd_iommu *iommu, u64 val, u64 mask, u8 shift) 422{ 423 u64 ctrl; 424 425 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 426 mask <<= shift; 427 ctrl &= ~mask; 428 ctrl |= (val << shift) & mask; 429 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 430} 431 432/* Generic functions to enable/disable certain features of the IOMMU. */ 433void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) 434{ 435 iommu_feature_set(iommu, 1ULL, 1ULL, bit); 436} 437 438static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) 439{ 440 iommu_feature_set(iommu, 0ULL, 1ULL, bit); 441} 442 443/* Function to enable the hardware */ 444static void iommu_enable(struct amd_iommu *iommu) 445{ 446 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 447} 448 449static void iommu_disable(struct amd_iommu *iommu) 450{ 451 if (!iommu->mmio_base) 452 return; 453 454 /* Disable command buffer */ 455 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 456 457 /* Disable event logging and event interrupts */ 458 iommu_feature_disable(iommu, CONTROL_EVT_INT_EN); 459 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); 460 461 /* Disable IOMMU GA_LOG */ 462 iommu_feature_disable(iommu, CONTROL_GALOG_EN); 463 iommu_feature_disable(iommu, CONTROL_GAINT_EN); 464 465 /* Disable IOMMU PPR logging */ 466 iommu_feature_disable(iommu, CONTROL_PPRLOG_EN); 467 iommu_feature_disable(iommu, CONTROL_PPRINT_EN); 468 469 /* Disable IOMMU hardware itself */ 470 iommu_feature_disable(iommu, CONTROL_IOMMU_EN); 471 472 /* Clear IRTE cache disabling bit */ 473 iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS); 474} 475 476/* 477 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in 478 * the system has one. 479 */ 480static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end) 481{ 482 if (!request_mem_region(address, end, "amd_iommu")) { 483 pr_err("Can not reserve memory region %llx-%llx for mmio\n", 484 address, end); 485 pr_err("This is a BIOS bug. Please contact your hardware vendor\n"); 486 return NULL; 487 } 488 489 return (u8 __iomem *)ioremap(address, end); 490} 491 492static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) 493{ 494 if (iommu->mmio_base) 495 iounmap(iommu->mmio_base); 496 release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end); 497} 498 499static inline u32 get_ivhd_header_size(struct ivhd_header *h) 500{ 501 u32 size = 0; 502 503 switch (h->type) { 504 case 0x10: 505 size = 24; 506 break; 507 case 0x11: 508 case 0x40: 509 size = 40; 510 break; 511 } 512 return size; 513} 514 515/**************************************************************************** 516 * 517 * The functions below belong to the first pass of AMD IOMMU ACPI table 518 * parsing. In this pass we try to find out the highest device id this 519 * code has to handle. Upon this information the size of the shared data 520 * structures is determined later. 521 * 522 ****************************************************************************/ 523 524/* 525 * This function calculates the length of a given IVHD entry 526 */ 527static inline int ivhd_entry_length(u8 *ivhd) 528{ 529 u32 type = ((struct ivhd_entry *)ivhd)->type; 530 531 if (type < 0x80) { 532 return 0x04 << (*ivhd >> 6); 533 } else if (type == IVHD_DEV_ACPI_HID) { 534 /* For ACPI_HID, offset 21 is uid len */ 535 return *((u8 *)ivhd + 21) + 22; 536 } 537 return 0; 538} 539 540/* 541 * After reading the highest device id from the IOMMU PCI capability header 542 * this function looks if there is a higher device id defined in the ACPI table 543 */ 544static int __init find_last_devid_from_ivhd(struct ivhd_header *h) 545{ 546 u8 *p = (void *)h, *end = (void *)h; 547 struct ivhd_entry *dev; 548 int last_devid = -EINVAL; 549 550 u32 ivhd_size = get_ivhd_header_size(h); 551 552 if (!ivhd_size) { 553 pr_err("Unsupported IVHD type %#x\n", h->type); 554 return -EINVAL; 555 } 556 557 p += ivhd_size; 558 end += h->length; 559 560 while (p < end) { 561 dev = (struct ivhd_entry *)p; 562 switch (dev->type) { 563 case IVHD_DEV_ALL: 564 /* Use maximum BDF value for DEV_ALL */ 565 return 0xffff; 566 case IVHD_DEV_SELECT: 567 case IVHD_DEV_RANGE_END: 568 case IVHD_DEV_ALIAS: 569 case IVHD_DEV_EXT_SELECT: 570 /* all the above subfield types refer to device ids */ 571 if (dev->devid > last_devid) 572 last_devid = dev->devid; 573 break; 574 default: 575 break; 576 } 577 p += ivhd_entry_length(p); 578 } 579 580 WARN_ON(p != end); 581 582 return last_devid; 583} 584 585static int __init check_ivrs_checksum(struct acpi_table_header *table) 586{ 587 int i; 588 u8 checksum = 0, *p = (u8 *)table; 589 590 for (i = 0; i < table->length; ++i) 591 checksum += p[i]; 592 if (checksum != 0) { 593 /* ACPI table corrupt */ 594 pr_err(FW_BUG "IVRS invalid checksum\n"); 595 return -ENODEV; 596 } 597 598 return 0; 599} 600 601/* 602 * Iterate over all IVHD entries in the ACPI table and find the highest device 603 * id which we need to handle. This is the first of three functions which parse 604 * the ACPI table. So we check the checksum here. 605 */ 606static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg) 607{ 608 u8 *p = (u8 *)table, *end = (u8 *)table; 609 struct ivhd_header *h; 610 int last_devid, last_bdf = 0; 611 612 p += IVRS_HEADER_LENGTH; 613 614 end += table->length; 615 while (p < end) { 616 h = (struct ivhd_header *)p; 617 if (h->pci_seg == pci_seg && 618 h->type == amd_iommu_target_ivhd_type) { 619 last_devid = find_last_devid_from_ivhd(h); 620 621 if (last_devid < 0) 622 return -EINVAL; 623 if (last_devid > last_bdf) 624 last_bdf = last_devid; 625 } 626 p += h->length; 627 } 628 WARN_ON(p != end); 629 630 return last_bdf; 631} 632 633/**************************************************************************** 634 * 635 * The following functions belong to the code path which parses the ACPI table 636 * the second time. In this ACPI parsing iteration we allocate IOMMU specific 637 * data structures, initialize the per PCI segment device/alias/rlookup table 638 * and also basically initialize the hardware. 639 * 640 ****************************************************************************/ 641 642/* Allocate per PCI segment device table */ 643static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) 644{ 645 pci_seg->dev_table = iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32, 646 pci_seg->dev_table_size); 647 if (!pci_seg->dev_table) 648 return -ENOMEM; 649 650 return 0; 651} 652 653static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg) 654{ 655 if (is_kdump_kernel()) 656 memunmap((void *)pci_seg->dev_table); 657 else 658 iommu_free_pages(pci_seg->dev_table); 659 pci_seg->dev_table = NULL; 660} 661 662/* Allocate per PCI segment IOMMU rlookup table. */ 663static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) 664{ 665 pci_seg->rlookup_table = kvzalloc_objs(*pci_seg->rlookup_table, 666 pci_seg->last_bdf + 1); 667 if (pci_seg->rlookup_table == NULL) 668 return -ENOMEM; 669 670 return 0; 671} 672 673static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg) 674{ 675 kvfree(pci_seg->rlookup_table); 676 pci_seg->rlookup_table = NULL; 677} 678 679static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) 680{ 681 pci_seg->irq_lookup_table = kvzalloc_objs(*pci_seg->irq_lookup_table, 682 pci_seg->last_bdf + 1); 683 if (pci_seg->irq_lookup_table == NULL) 684 return -ENOMEM; 685 686 return 0; 687} 688 689static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) 690{ 691 kvfree(pci_seg->irq_lookup_table); 692 pci_seg->irq_lookup_table = NULL; 693} 694 695static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) 696{ 697 int i; 698 699 pci_seg->alias_table = kvmalloc_objs(*pci_seg->alias_table, 700 pci_seg->last_bdf + 1); 701 if (!pci_seg->alias_table) 702 return -ENOMEM; 703 704 /* 705 * let all alias entries point to itself 706 */ 707 for (i = 0; i <= pci_seg->last_bdf; ++i) 708 pci_seg->alias_table[i] = i; 709 710 return 0; 711} 712 713static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) 714{ 715 kvfree(pci_seg->alias_table); 716 pci_seg->alias_table = NULL; 717} 718 719static inline void *iommu_memremap(unsigned long paddr, size_t size) 720{ 721 phys_addr_t phys; 722 723 if (!paddr) 724 return NULL; 725 726 /* 727 * Obtain true physical address in kdump kernel when SME is enabled. 728 * Currently, previous kernel with SME enabled and kdump kernel 729 * with SME support disabled is not supported. 730 */ 731 phys = __sme_clr(paddr); 732 733 if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) 734 return (__force void *)ioremap_encrypted(phys, size); 735 else 736 return memremap(phys, size, MEMREMAP_WB); 737} 738 739/* 740 * Allocates the command buffer. This buffer is per AMD IOMMU. We can 741 * write commands to that buffer later and the IOMMU will execute them 742 * asynchronously 743 */ 744static int __init alloc_command_buffer(struct amd_iommu *iommu) 745{ 746 iommu->cmd_buf = iommu_alloc_pages_sz(GFP_KERNEL, CMD_BUFFER_SIZE); 747 748 return iommu->cmd_buf ? 0 : -ENOMEM; 749} 750 751/* 752 * Interrupt handler has processed all pending events and adjusted head 753 * and tail pointer. Reset overflow mask and restart logging again. 754 */ 755void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type, 756 u8 cntrl_intr, u8 cntrl_log, 757 u32 status_run_mask, u32 status_overflow_mask) 758{ 759 u32 status; 760 761 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 762 if (status & status_run_mask) 763 return; 764 765 pr_info_ratelimited("IOMMU %s log restarting\n", evt_type); 766 767 iommu_feature_disable(iommu, cntrl_log); 768 iommu_feature_disable(iommu, cntrl_intr); 769 770 writel(status_overflow_mask, iommu->mmio_base + MMIO_STATUS_OFFSET); 771 772 iommu_feature_enable(iommu, cntrl_intr); 773 iommu_feature_enable(iommu, cntrl_log); 774} 775 776/* 777 * This function restarts event logging in case the IOMMU experienced 778 * an event log buffer overflow. 779 */ 780void amd_iommu_restart_event_logging(struct amd_iommu *iommu) 781{ 782 amd_iommu_restart_log(iommu, "Event", CONTROL_EVT_INT_EN, 783 CONTROL_EVT_LOG_EN, MMIO_STATUS_EVT_RUN_MASK, 784 MMIO_STATUS_EVT_OVERFLOW_MASK); 785} 786 787/* 788 * This function restarts event logging in case the IOMMU experienced 789 * GA log overflow. 790 */ 791void amd_iommu_restart_ga_log(struct amd_iommu *iommu) 792{ 793 amd_iommu_restart_log(iommu, "GA", CONTROL_GAINT_EN, 794 CONTROL_GALOG_EN, MMIO_STATUS_GALOG_RUN_MASK, 795 MMIO_STATUS_GALOG_OVERFLOW_MASK); 796} 797 798/* 799 * This function resets the command buffer if the IOMMU stopped fetching 800 * commands from it. 801 */ 802static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) 803{ 804 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 805 806 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); 807 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 808 iommu->cmd_buf_head = 0; 809 iommu->cmd_buf_tail = 0; 810 811 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); 812} 813 814/* 815 * This function writes the command buffer address to the hardware and 816 * enables it. 817 */ 818static void iommu_enable_command_buffer(struct amd_iommu *iommu) 819{ 820 u64 entry; 821 822 BUG_ON(iommu->cmd_buf == NULL); 823 824 if (!is_kdump_kernel()) { 825 /* 826 * Command buffer is re-used for kdump kernel and setting 827 * of MMIO register is not required. 828 */ 829 entry = iommu_virt_to_phys(iommu->cmd_buf); 830 entry |= MMIO_CMD_SIZE_512; 831 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, 832 &entry, sizeof(entry)); 833 } 834 835 amd_iommu_reset_cmd_buffer(iommu); 836} 837 838/* 839 * This function disables the command buffer 840 */ 841static void iommu_disable_command_buffer(struct amd_iommu *iommu) 842{ 843 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 844} 845 846static void __init free_command_buffer(struct amd_iommu *iommu) 847{ 848 iommu_free_pages(iommu->cmd_buf); 849} 850 851void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp, 852 size_t size) 853{ 854 int nid = iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE; 855 void *buf; 856 857 size = PAGE_ALIGN(size); 858 buf = iommu_alloc_pages_node_sz(nid, gfp, size); 859 if (!buf) 860 return NULL; 861 if (check_feature(FEATURE_SNP) && 862 set_memory_4k((unsigned long)buf, size / PAGE_SIZE)) { 863 iommu_free_pages(buf); 864 return NULL; 865 } 866 867 return buf; 868} 869 870/* allocates the memory where the IOMMU will log its events to */ 871static int __init alloc_event_buffer(void) 872{ 873 struct amd_iommu *iommu; 874 875 for_each_iommu(iommu) { 876 iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL, 877 amd_iommu_evtlog_size); 878 if (!iommu->evt_buf) 879 return -ENOMEM; 880 } 881 882 return 0; 883} 884 885static void iommu_enable_event_buffer(void) 886{ 887 struct amd_iommu *iommu; 888 u64 entry; 889 890 for_each_iommu(iommu) { 891 BUG_ON(iommu->evt_buf == NULL); 892 893 if (!is_kdump_kernel()) { 894 /* 895 * Event buffer is re-used for kdump kernel and setting 896 * of MMIO register is not required. 897 */ 898 entry = iommu_virt_to_phys(iommu->evt_buf); 899 entry |= (amd_iommu_evtlog_size == EVTLOG_SIZE_DEF) ? 900 EVTLOG_LEN_MASK_DEF : EVTLOG_LEN_MASK_MAX; 901 902 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, 903 &entry, sizeof(entry)); 904 } 905 906 /* set head and tail to zero manually */ 907 writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); 908 writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); 909 910 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); 911 } 912} 913 914/* 915 * This function disables the event log buffer 916 */ 917static void iommu_disable_event_buffer(struct amd_iommu *iommu) 918{ 919 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); 920} 921 922static void __init free_event_buffer(struct amd_iommu *iommu) 923{ 924 iommu_free_pages(iommu->evt_buf); 925} 926 927static void free_ga_log(struct amd_iommu *iommu) 928{ 929#ifdef CONFIG_IRQ_REMAP 930 iommu_free_pages(iommu->ga_log); 931 iommu_free_pages(iommu->ga_log_tail); 932#endif 933} 934 935#ifdef CONFIG_IRQ_REMAP 936static int iommu_ga_log_enable(struct amd_iommu *iommu) 937{ 938 u32 status, i; 939 u64 entry; 940 941 if (!iommu->ga_log) 942 return -EINVAL; 943 944 entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; 945 memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, 946 &entry, sizeof(entry)); 947 entry = (iommu_virt_to_phys(iommu->ga_log_tail) & 948 (BIT_ULL(52)-1)) & ~7ULL; 949 memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET, 950 &entry, sizeof(entry)); 951 writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); 952 writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET); 953 954 955 iommu_feature_enable(iommu, CONTROL_GAINT_EN); 956 iommu_feature_enable(iommu, CONTROL_GALOG_EN); 957 958 for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) { 959 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 960 if (status & (MMIO_STATUS_GALOG_RUN_MASK)) 961 break; 962 udelay(10); 963 } 964 965 if (WARN_ON(i >= MMIO_STATUS_TIMEOUT)) 966 return -EINVAL; 967 968 return 0; 969} 970 971static int iommu_init_ga_log(struct amd_iommu *iommu) 972{ 973 int nid = iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE; 974 975 if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) 976 return 0; 977 978 iommu->ga_log = iommu_alloc_pages_node_sz(nid, GFP_KERNEL, GA_LOG_SIZE); 979 if (!iommu->ga_log) 980 goto err_out; 981 982 iommu->ga_log_tail = iommu_alloc_pages_node_sz(nid, GFP_KERNEL, 8); 983 if (!iommu->ga_log_tail) 984 goto err_out; 985 986 return 0; 987err_out: 988 free_ga_log(iommu); 989 return -EINVAL; 990} 991#endif /* CONFIG_IRQ_REMAP */ 992 993static int __init alloc_cwwb_sem(struct amd_iommu *iommu) 994{ 995 iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL, 1); 996 if (!iommu->cmd_sem) 997 return -ENOMEM; 998 iommu->cmd_sem_paddr = iommu_virt_to_phys((void *)iommu->cmd_sem); 999 return 0; 1000} 1001 1002static int __init remap_event_buffer(void) 1003{ 1004 struct amd_iommu *iommu; 1005 u64 paddr; 1006 1007 pr_info_once("Re-using event buffer from the previous kernel\n"); 1008 for_each_iommu(iommu) { 1009 paddr = readq(iommu->mmio_base + MMIO_EVT_BUF_OFFSET) & PM_ADDR_MASK; 1010 iommu->evt_buf = iommu_memremap(paddr, amd_iommu_evtlog_size); 1011 if (!iommu->evt_buf) 1012 return -ENOMEM; 1013 } 1014 1015 return 0; 1016} 1017 1018static int __init remap_command_buffer(struct amd_iommu *iommu) 1019{ 1020 u64 paddr; 1021 1022 pr_info_once("Re-using command buffer from the previous kernel\n"); 1023 paddr = readq(iommu->mmio_base + MMIO_CMD_BUF_OFFSET) & PM_ADDR_MASK; 1024 iommu->cmd_buf = iommu_memremap(paddr, CMD_BUFFER_SIZE); 1025 1026 return iommu->cmd_buf ? 0 : -ENOMEM; 1027} 1028 1029static int __init remap_or_alloc_cwwb_sem(struct amd_iommu *iommu) 1030{ 1031 u64 paddr; 1032 1033 if (check_feature(FEATURE_SNP)) { 1034 /* 1035 * When SNP is enabled, the exclusion base register is used for the 1036 * completion wait buffer (CWB) address. Read and re-use it. 1037 */ 1038 pr_info_once("Re-using CWB buffers from the previous kernel\n"); 1039 paddr = readq(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET) & PM_ADDR_MASK; 1040 iommu->cmd_sem = iommu_memremap(paddr, PAGE_SIZE); 1041 if (!iommu->cmd_sem) 1042 return -ENOMEM; 1043 iommu->cmd_sem_paddr = paddr; 1044 } else { 1045 return alloc_cwwb_sem(iommu); 1046 } 1047 1048 return 0; 1049} 1050 1051static int __init alloc_iommu_buffers(struct amd_iommu *iommu) 1052{ 1053 int ret; 1054 1055 /* 1056 * Reuse/Remap the previous kernel's allocated completion wait 1057 * command and event buffers for kdump boot. 1058 */ 1059 if (is_kdump_kernel()) { 1060 ret = remap_or_alloc_cwwb_sem(iommu); 1061 if (ret) 1062 return ret; 1063 1064 ret = remap_command_buffer(iommu); 1065 if (ret) 1066 return ret; 1067 } else { 1068 ret = alloc_cwwb_sem(iommu); 1069 if (ret) 1070 return ret; 1071 1072 ret = alloc_command_buffer(iommu); 1073 if (ret) 1074 return ret; 1075 } 1076 1077 return 0; 1078} 1079 1080static void __init free_cwwb_sem(struct amd_iommu *iommu) 1081{ 1082 if (iommu->cmd_sem) 1083 iommu_free_pages((void *)iommu->cmd_sem); 1084} 1085static void __init unmap_cwwb_sem(struct amd_iommu *iommu) 1086{ 1087 if (iommu->cmd_sem) { 1088 if (check_feature(FEATURE_SNP)) 1089 memunmap((void *)iommu->cmd_sem); 1090 else 1091 iommu_free_pages((void *)iommu->cmd_sem); 1092 } 1093} 1094 1095static void __init unmap_command_buffer(struct amd_iommu *iommu) 1096{ 1097 memunmap((void *)iommu->cmd_buf); 1098} 1099 1100static void __init unmap_event_buffer(struct amd_iommu *iommu) 1101{ 1102 memunmap(iommu->evt_buf); 1103} 1104 1105static void __init free_iommu_buffers(struct amd_iommu *iommu) 1106{ 1107 if (is_kdump_kernel()) { 1108 unmap_cwwb_sem(iommu); 1109 unmap_command_buffer(iommu); 1110 unmap_event_buffer(iommu); 1111 } else { 1112 free_cwwb_sem(iommu); 1113 free_command_buffer(iommu); 1114 free_event_buffer(iommu); 1115 } 1116} 1117 1118static void iommu_enable_xt(struct amd_iommu *iommu) 1119{ 1120#ifdef CONFIG_IRQ_REMAP 1121 /* 1122 * XT mode (32-bit APIC destination ID) requires 1123 * GA mode (128-bit IRTE support) as a prerequisite. 1124 */ 1125 if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) && 1126 amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 1127 iommu_feature_enable(iommu, CONTROL_XT_EN); 1128#endif /* CONFIG_IRQ_REMAP */ 1129} 1130 1131static void iommu_enable_gt(struct amd_iommu *iommu) 1132{ 1133 if (!check_feature(FEATURE_GT)) 1134 return; 1135 1136 iommu_feature_enable(iommu, CONTROL_GT_EN); 1137 1138 /* 1139 * This feature needs to be enabled prior to a call 1140 * to iommu_snp_enable(). Since this function is called 1141 * in early_enable_iommu(), it is safe to enable here. 1142 */ 1143 if (check_feature2(FEATURE_GCR3TRPMODE)) 1144 iommu_feature_enable(iommu, CONTROL_GCR3TRPMODE); 1145} 1146 1147/* sets a specific bit in the device table entry. */ 1148static void set_dte_bit(struct dev_table_entry *dte, u8 bit) 1149{ 1150 int i = (bit >> 6) & 0x03; 1151 int _bit = bit & 0x3f; 1152 1153 dte->data[i] |= (1UL << _bit); 1154} 1155 1156static bool __reuse_device_table(struct amd_iommu *iommu) 1157{ 1158 struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; 1159 struct dev_table_entry *old_dev_tbl_entry; 1160 u32 lo, hi, old_devtb_size, devid; 1161 phys_addr_t old_devtb_phys; 1162 u16 dom_id; 1163 bool dte_v; 1164 u64 entry; 1165 1166 /* Each IOMMU use separate device table with the same size */ 1167 lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET); 1168 hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4); 1169 entry = (((u64) hi) << 32) + lo; 1170 1171 old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; 1172 if (old_devtb_size != pci_seg->dev_table_size) { 1173 pr_err("The device table size of IOMMU:%d is not expected!\n", 1174 iommu->index); 1175 return false; 1176 } 1177 1178 /* 1179 * When SME is enabled in the first kernel, the entry includes the 1180 * memory encryption mask(sme_me_mask), we must remove the memory 1181 * encryption mask to obtain the true physical address in kdump kernel. 1182 */ 1183 old_devtb_phys = __sme_clr(entry) & PAGE_MASK; 1184 1185 if (old_devtb_phys >= 0x100000000ULL) { 1186 pr_err("The address of old device table is above 4G, not trustworthy!\n"); 1187 return false; 1188 } 1189 1190 /* 1191 * Re-use the previous kernel's device table for kdump. 1192 */ 1193 pci_seg->old_dev_tbl_cpy = iommu_memremap(old_devtb_phys, pci_seg->dev_table_size); 1194 if (pci_seg->old_dev_tbl_cpy == NULL) { 1195 pr_err("Failed to remap memory for reusing old device table!\n"); 1196 return false; 1197 } 1198 1199 for (devid = 0; devid <= pci_seg->last_bdf; devid++) { 1200 old_dev_tbl_entry = &pci_seg->old_dev_tbl_cpy[devid]; 1201 dte_v = FIELD_GET(DTE_FLAG_V, old_dev_tbl_entry->data[0]); 1202 dom_id = FIELD_GET(DTE_DOMID_MASK, old_dev_tbl_entry->data[1]); 1203 1204 if (!dte_v || !dom_id) 1205 continue; 1206 /* 1207 * ID reservation can fail with -ENOSPC when there 1208 * are multiple devices present in the same domain, 1209 * hence check only for -ENOMEM. 1210 */ 1211 if (amd_iommu_pdom_id_reserve(dom_id, GFP_KERNEL) == -ENOMEM) 1212 return false; 1213 } 1214 1215 return true; 1216} 1217 1218static bool reuse_device_table(void) 1219{ 1220 struct amd_iommu *iommu; 1221 struct amd_iommu_pci_seg *pci_seg; 1222 1223 if (!amd_iommu_pre_enabled) 1224 return false; 1225 1226 pr_warn("Translation is already enabled - trying to reuse translation structures\n"); 1227 1228 /* 1229 * All IOMMUs within PCI segment shares common device table. 1230 * Hence reuse device table only once per PCI segment. 1231 */ 1232 for_each_pci_segment(pci_seg) { 1233 for_each_iommu(iommu) { 1234 if (pci_seg->id != iommu->pci_seg->id) 1235 continue; 1236 if (!__reuse_device_table(iommu)) 1237 return false; 1238 break; 1239 } 1240 } 1241 1242 return true; 1243} 1244 1245struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid) 1246{ 1247 struct ivhd_dte_flags *e; 1248 unsigned int best_len = UINT_MAX; 1249 struct dev_table_entry *dte = NULL; 1250 1251 for_each_ivhd_dte_flags(e) { 1252 /* 1253 * Need to go through the whole list to find the smallest range, 1254 * which contains the devid. 1255 */ 1256 if ((e->segid == segid) && 1257 (e->devid_first <= devid) && (devid <= e->devid_last)) { 1258 unsigned int len = e->devid_last - e->devid_first; 1259 1260 if (len < best_len) { 1261 dte = &(e->dte); 1262 best_len = len; 1263 } 1264 } 1265 } 1266 return dte; 1267} 1268 1269static bool search_ivhd_dte_flags(u16 segid, u16 first, u16 last) 1270{ 1271 struct ivhd_dte_flags *e; 1272 1273 for_each_ivhd_dte_flags(e) { 1274 if ((e->segid == segid) && 1275 (e->devid_first == first) && 1276 (e->devid_last == last)) 1277 return true; 1278 } 1279 return false; 1280} 1281 1282/* 1283 * This function takes the device specific flags read from the ACPI 1284 * table and sets up the device table entry with that information 1285 */ 1286static void __init 1287set_dev_entry_from_acpi_range(struct amd_iommu *iommu, u16 first, u16 last, 1288 u32 flags, u32 ext_flags) 1289{ 1290 int i; 1291 struct dev_table_entry dte = {}; 1292 1293 /* Parse IVHD DTE setting flags and store information */ 1294 if (flags) { 1295 struct ivhd_dte_flags *d; 1296 1297 if (search_ivhd_dte_flags(iommu->pci_seg->id, first, last)) 1298 return; 1299 1300 d = kzalloc_obj(struct ivhd_dte_flags); 1301 if (!d) 1302 return; 1303 1304 pr_debug("%s: devid range %#x:%#x\n", __func__, first, last); 1305 1306 if (flags & ACPI_DEVFLAG_INITPASS) 1307 set_dte_bit(&dte, DEV_ENTRY_INIT_PASS); 1308 if (flags & ACPI_DEVFLAG_EXTINT) 1309 set_dte_bit(&dte, DEV_ENTRY_EINT_PASS); 1310 if (flags & ACPI_DEVFLAG_NMI) 1311 set_dte_bit(&dte, DEV_ENTRY_NMI_PASS); 1312 if (flags & ACPI_DEVFLAG_SYSMGT1) 1313 set_dte_bit(&dte, DEV_ENTRY_SYSMGT1); 1314 if (flags & ACPI_DEVFLAG_SYSMGT2) 1315 set_dte_bit(&dte, DEV_ENTRY_SYSMGT2); 1316 if (flags & ACPI_DEVFLAG_LINT0) 1317 set_dte_bit(&dte, DEV_ENTRY_LINT0_PASS); 1318 if (flags & ACPI_DEVFLAG_LINT1) 1319 set_dte_bit(&dte, DEV_ENTRY_LINT1_PASS); 1320 1321 /* Apply erratum 63, which needs info in initial_dte */ 1322 if (FIELD_GET(DTE_DATA1_SYSMGT_MASK, dte.data[1]) == 0x1) 1323 dte.data[0] |= DTE_FLAG_IW; 1324 1325 memcpy(&d->dte, &dte, sizeof(dte)); 1326 d->segid = iommu->pci_seg->id; 1327 d->devid_first = first; 1328 d->devid_last = last; 1329 list_add_tail(&d->list, &amd_ivhd_dev_flags_list); 1330 } 1331 1332 for (i = first; i <= last; i++) { 1333 if (flags) { 1334 struct dev_table_entry *dev_table = get_dev_table(iommu); 1335 1336 memcpy(&dev_table[i], &dte, sizeof(dte)); 1337 } 1338 amd_iommu_set_rlookup_table(iommu, i); 1339 } 1340} 1341 1342static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, 1343 u16 devid, u32 flags, u32 ext_flags) 1344{ 1345 set_dev_entry_from_acpi_range(iommu, devid, devid, flags, ext_flags); 1346} 1347 1348int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line) 1349{ 1350 struct devid_map *entry; 1351 struct list_head *list; 1352 1353 if (type == IVHD_SPECIAL_IOAPIC) 1354 list = &ioapic_map; 1355 else if (type == IVHD_SPECIAL_HPET) 1356 list = &hpet_map; 1357 else 1358 return -EINVAL; 1359 1360 list_for_each_entry(entry, list, list) { 1361 if (!(entry->id == id && entry->cmd_line)) 1362 continue; 1363 1364 pr_info("Command-line override present for %s id %d - ignoring\n", 1365 type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id); 1366 1367 *devid = entry->devid; 1368 1369 return 0; 1370 } 1371 1372 entry = kzalloc_obj(*entry); 1373 if (!entry) 1374 return -ENOMEM; 1375 1376 entry->id = id; 1377 entry->devid = *devid; 1378 entry->cmd_line = cmd_line; 1379 1380 list_add_tail(&entry->list, list); 1381 1382 return 0; 1383} 1384 1385static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid, 1386 bool cmd_line) 1387{ 1388 struct acpihid_map_entry *entry; 1389 struct list_head *list = &acpihid_map; 1390 1391 list_for_each_entry(entry, list, list) { 1392 if (strcmp(entry->hid, hid) || 1393 (*uid && *entry->uid && strcmp(entry->uid, uid)) || 1394 !entry->cmd_line) 1395 continue; 1396 1397 pr_info("Command-line override for hid:%s uid:%s\n", 1398 hid, uid); 1399 *devid = entry->devid; 1400 return 0; 1401 } 1402 1403 entry = kzalloc_obj(*entry); 1404 if (!entry) 1405 return -ENOMEM; 1406 1407 memcpy(entry->uid, uid, strlen(uid)); 1408 memcpy(entry->hid, hid, strlen(hid)); 1409 entry->devid = *devid; 1410 entry->cmd_line = cmd_line; 1411 entry->root_devid = (entry->devid & (~0x7)); 1412 1413 pr_info("%s, add hid:%s, uid:%s, rdevid:%#x\n", 1414 entry->cmd_line ? "cmd" : "ivrs", 1415 entry->hid, entry->uid, entry->root_devid); 1416 1417 list_add_tail(&entry->list, list); 1418 return 0; 1419} 1420 1421static int __init add_early_maps(void) 1422{ 1423 int i, ret; 1424 1425 for (i = 0; i < early_ioapic_map_size; ++i) { 1426 ret = add_special_device(IVHD_SPECIAL_IOAPIC, 1427 early_ioapic_map[i].id, 1428 &early_ioapic_map[i].devid, 1429 early_ioapic_map[i].cmd_line); 1430 if (ret) 1431 return ret; 1432 } 1433 1434 for (i = 0; i < early_hpet_map_size; ++i) { 1435 ret = add_special_device(IVHD_SPECIAL_HPET, 1436 early_hpet_map[i].id, 1437 &early_hpet_map[i].devid, 1438 early_hpet_map[i].cmd_line); 1439 if (ret) 1440 return ret; 1441 } 1442 1443 for (i = 0; i < early_acpihid_map_size; ++i) { 1444 ret = add_acpi_hid_device(early_acpihid_map[i].hid, 1445 early_acpihid_map[i].uid, 1446 &early_acpihid_map[i].devid, 1447 early_acpihid_map[i].cmd_line); 1448 if (ret) 1449 return ret; 1450 } 1451 1452 return 0; 1453} 1454 1455/* 1456 * Takes a pointer to an AMD IOMMU entry in the ACPI table and 1457 * initializes the hardware and our data structures with it. 1458 */ 1459static int __init init_iommu_from_acpi(struct amd_iommu *iommu, 1460 struct ivhd_header *h) 1461{ 1462 u8 *p = (u8 *)h; 1463 u8 *end = p, flags = 0; 1464 u16 devid = 0, devid_start = 0, devid_to = 0, seg_id; 1465 u32 dev_i, ext_flags = 0; 1466 bool alias = false; 1467 struct ivhd_entry *e; 1468 struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; 1469 u32 ivhd_size; 1470 int ret; 1471 1472 1473 ret = add_early_maps(); 1474 if (ret) 1475 return ret; 1476 1477 amd_iommu_apply_ivrs_quirks(); 1478 1479 /* 1480 * First save the recommended feature enable bits from ACPI 1481 */ 1482 iommu->acpi_flags = h->flags; 1483 1484 /* 1485 * Done. Now parse the device entries 1486 */ 1487 ivhd_size = get_ivhd_header_size(h); 1488 if (!ivhd_size) { 1489 pr_err("Unsupported IVHD type %#x\n", h->type); 1490 return -EINVAL; 1491 } 1492 1493 p += ivhd_size; 1494 1495 end += h->length; 1496 1497 1498 while (p < end) { 1499 e = (struct ivhd_entry *)p; 1500 seg_id = pci_seg->id; 1501 1502 switch (e->type) { 1503 case IVHD_DEV_ALL: 1504 1505 DUMP_printk(" DEV_ALL\t\t\tsetting: %#02x\n", e->flags); 1506 set_dev_entry_from_acpi_range(iommu, 0, pci_seg->last_bdf, e->flags, 0); 1507 break; 1508 case IVHD_DEV_SELECT: 1509 1510 DUMP_printk(" DEV_SELECT\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x\n", 1511 seg_id, PCI_BUS_NUM(e->devid), 1512 PCI_SLOT(e->devid), 1513 PCI_FUNC(e->devid), 1514 e->flags); 1515 1516 devid = e->devid; 1517 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1518 break; 1519 case IVHD_DEV_SELECT_RANGE_START: 1520 1521 DUMP_printk(" DEV_SELECT_RANGE_START\tdevid: %04x:%02x:%02x.%x flags: %#02x\n", 1522 seg_id, PCI_BUS_NUM(e->devid), 1523 PCI_SLOT(e->devid), 1524 PCI_FUNC(e->devid), 1525 e->flags); 1526 1527 devid_start = e->devid; 1528 flags = e->flags; 1529 ext_flags = 0; 1530 alias = false; 1531 break; 1532 case IVHD_DEV_ALIAS: 1533 1534 DUMP_printk(" DEV_ALIAS\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %02x:%02x.%x\n", 1535 seg_id, PCI_BUS_NUM(e->devid), 1536 PCI_SLOT(e->devid), 1537 PCI_FUNC(e->devid), 1538 e->flags, 1539 PCI_BUS_NUM(e->ext >> 8), 1540 PCI_SLOT(e->ext >> 8), 1541 PCI_FUNC(e->ext >> 8)); 1542 1543 devid = e->devid; 1544 devid_to = e->ext >> 8; 1545 set_dev_entry_from_acpi(iommu, devid , e->flags, 0); 1546 set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); 1547 pci_seg->alias_table[devid] = devid_to; 1548 break; 1549 case IVHD_DEV_ALIAS_RANGE: 1550 1551 DUMP_printk(" DEV_ALIAS_RANGE\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %04x:%02x:%02x.%x\n", 1552 seg_id, PCI_BUS_NUM(e->devid), 1553 PCI_SLOT(e->devid), 1554 PCI_FUNC(e->devid), 1555 e->flags, 1556 seg_id, PCI_BUS_NUM(e->ext >> 8), 1557 PCI_SLOT(e->ext >> 8), 1558 PCI_FUNC(e->ext >> 8)); 1559 1560 devid_start = e->devid; 1561 flags = e->flags; 1562 devid_to = e->ext >> 8; 1563 ext_flags = 0; 1564 alias = true; 1565 break; 1566 case IVHD_DEV_EXT_SELECT: 1567 1568 DUMP_printk(" DEV_EXT_SELECT\t\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n", 1569 seg_id, PCI_BUS_NUM(e->devid), 1570 PCI_SLOT(e->devid), 1571 PCI_FUNC(e->devid), 1572 e->flags, e->ext); 1573 1574 devid = e->devid; 1575 set_dev_entry_from_acpi(iommu, devid, e->flags, 1576 e->ext); 1577 break; 1578 case IVHD_DEV_EXT_SELECT_RANGE: 1579 1580 DUMP_printk(" DEV_EXT_SELECT_RANGE\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n", 1581 seg_id, PCI_BUS_NUM(e->devid), 1582 PCI_SLOT(e->devid), 1583 PCI_FUNC(e->devid), 1584 e->flags, e->ext); 1585 1586 devid_start = e->devid; 1587 flags = e->flags; 1588 ext_flags = e->ext; 1589 alias = false; 1590 break; 1591 case IVHD_DEV_RANGE_END: 1592 1593 DUMP_printk(" DEV_RANGE_END\t\tdevid: %04x:%02x:%02x.%x\n", 1594 seg_id, PCI_BUS_NUM(e->devid), 1595 PCI_SLOT(e->devid), 1596 PCI_FUNC(e->devid)); 1597 1598 devid = e->devid; 1599 if (alias) { 1600 for (dev_i = devid_start; dev_i <= devid; ++dev_i) 1601 pci_seg->alias_table[dev_i] = devid_to; 1602 set_dev_entry_from_acpi(iommu, devid_to, flags, ext_flags); 1603 } 1604 set_dev_entry_from_acpi_range(iommu, devid_start, devid, flags, ext_flags); 1605 break; 1606 case IVHD_DEV_SPECIAL: { 1607 u8 handle, type; 1608 const char *var; 1609 u32 devid; 1610 int ret; 1611 1612 handle = e->ext & 0xff; 1613 devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8)); 1614 type = (e->ext >> 24) & 0xff; 1615 1616 if (type == IVHD_SPECIAL_IOAPIC) 1617 var = "IOAPIC"; 1618 else if (type == IVHD_SPECIAL_HPET) 1619 var = "HPET"; 1620 else 1621 var = "UNKNOWN"; 1622 1623 DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n", 1624 var, (int)handle, 1625 seg_id, PCI_BUS_NUM(devid), 1626 PCI_SLOT(devid), 1627 PCI_FUNC(devid), 1628 e->flags); 1629 1630 ret = add_special_device(type, handle, &devid, false); 1631 if (ret) 1632 return ret; 1633 1634 /* 1635 * add_special_device might update the devid in case a 1636 * command-line override is present. So call 1637 * set_dev_entry_from_acpi after add_special_device. 1638 */ 1639 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1640 1641 break; 1642 } 1643 case IVHD_DEV_ACPI_HID: { 1644 u32 devid; 1645 u8 hid[ACPIHID_HID_LEN]; 1646 u8 uid[ACPIHID_UID_LEN]; 1647 int ret; 1648 1649 if (h->type != 0x40) { 1650 pr_err(FW_BUG "Invalid IVHD device type %#x\n", 1651 e->type); 1652 break; 1653 } 1654 1655 BUILD_BUG_ON(sizeof(e->ext_hid) != ACPIHID_HID_LEN - 1); 1656 memcpy(hid, &e->ext_hid, ACPIHID_HID_LEN - 1); 1657 hid[ACPIHID_HID_LEN - 1] = '\0'; 1658 1659 if (!(*hid)) { 1660 pr_err(FW_BUG "Invalid HID.\n"); 1661 break; 1662 } 1663 1664 uid[0] = '\0'; 1665 switch (e->uidf) { 1666 case UID_NOT_PRESENT: 1667 1668 if (e->uidl != 0) 1669 pr_warn(FW_BUG "Invalid UID length.\n"); 1670 1671 break; 1672 case UID_IS_INTEGER: 1673 1674 sprintf(uid, "%d", e->uid); 1675 1676 break; 1677 case UID_IS_CHARACTER: 1678 1679 memcpy(uid, &e->uid, e->uidl); 1680 uid[e->uidl] = '\0'; 1681 1682 break; 1683 default: 1684 break; 1685 } 1686 1687 devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid); 1688 DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n", 1689 hid, uid, seg_id, 1690 PCI_BUS_NUM(devid), 1691 PCI_SLOT(devid), 1692 PCI_FUNC(devid), 1693 e->flags); 1694 1695 flags = e->flags; 1696 1697 ret = add_acpi_hid_device(hid, uid, &devid, false); 1698 if (ret) 1699 return ret; 1700 1701 /* 1702 * add_special_device might update the devid in case a 1703 * command-line override is present. So call 1704 * set_dev_entry_from_acpi after add_special_device. 1705 */ 1706 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1707 1708 break; 1709 } 1710 default: 1711 break; 1712 } 1713 1714 p += ivhd_entry_length(p); 1715 } 1716 1717 return 0; 1718} 1719 1720/* Allocate PCI segment data structure */ 1721static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, 1722 struct acpi_table_header *ivrs_base) 1723{ 1724 struct amd_iommu_pci_seg *pci_seg; 1725 int last_bdf; 1726 1727 /* 1728 * First parse ACPI tables to find the largest Bus/Dev/Func we need to 1729 * handle in this PCI segment. Upon this information the shared data 1730 * structures for the PCI segments in the system will be allocated. 1731 */ 1732 last_bdf = find_last_devid_acpi(ivrs_base, id); 1733 if (last_bdf < 0) 1734 return NULL; 1735 1736 pci_seg = kzalloc_obj(struct amd_iommu_pci_seg); 1737 if (pci_seg == NULL) 1738 return NULL; 1739 1740 pci_seg->last_bdf = last_bdf; 1741 DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf); 1742 pci_seg->dev_table_size = 1743 max(roundup_pow_of_two((last_bdf + 1) * DEV_TABLE_ENTRY_SIZE), 1744 SZ_4K); 1745 1746 pci_seg->id = id; 1747 init_llist_head(&pci_seg->dev_data_list); 1748 INIT_LIST_HEAD(&pci_seg->unity_map); 1749 list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list); 1750 1751 if (alloc_dev_table(pci_seg)) 1752 goto err_free_pci_seg; 1753 if (alloc_alias_table(pci_seg)) 1754 goto err_free_dev_table; 1755 if (alloc_rlookup_table(pci_seg)) 1756 goto err_free_alias_table; 1757 1758 return pci_seg; 1759 1760err_free_alias_table: 1761 free_alias_table(pci_seg); 1762err_free_dev_table: 1763 free_dev_table(pci_seg); 1764err_free_pci_seg: 1765 list_del(&pci_seg->list); 1766 kfree(pci_seg); 1767 return NULL; 1768} 1769 1770static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id, 1771 struct acpi_table_header *ivrs_base) 1772{ 1773 struct amd_iommu_pci_seg *pci_seg; 1774 1775 for_each_pci_segment(pci_seg) { 1776 if (pci_seg->id == id) 1777 return pci_seg; 1778 } 1779 1780 return alloc_pci_segment(id, ivrs_base); 1781} 1782 1783static void __init free_pci_segments(void) 1784{ 1785 struct amd_iommu_pci_seg *pci_seg, *next; 1786 1787 for_each_pci_segment_safe(pci_seg, next) { 1788 list_del(&pci_seg->list); 1789 free_irq_lookup_table(pci_seg); 1790 free_rlookup_table(pci_seg); 1791 free_alias_table(pci_seg); 1792 free_dev_table(pci_seg); 1793 kfree(pci_seg); 1794 } 1795} 1796 1797static void __init free_sysfs(struct amd_iommu *iommu) 1798{ 1799 if (iommu->iommu.dev) { 1800 iommu_device_unregister(&iommu->iommu); 1801 iommu_device_sysfs_remove(&iommu->iommu); 1802 } 1803} 1804 1805static void __init free_iommu_one(struct amd_iommu *iommu) 1806{ 1807 free_sysfs(iommu); 1808 free_iommu_buffers(iommu); 1809 amd_iommu_free_ppr_log(iommu); 1810 free_ga_log(iommu); 1811 iommu_unmap_mmio_space(iommu); 1812 amd_iommu_iopf_uninit(iommu); 1813} 1814 1815static void __init free_iommu_all(void) 1816{ 1817 struct amd_iommu *iommu, *next; 1818 1819 for_each_iommu_safe(iommu, next) { 1820 list_del(&iommu->list); 1821 free_iommu_one(iommu); 1822 kfree(iommu); 1823 } 1824} 1825 1826/* 1827 * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations) 1828 * Workaround: 1829 * BIOS should disable L2B micellaneous clock gating by setting 1830 * L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b 1831 */ 1832static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) 1833{ 1834 u32 value; 1835 1836 if ((boot_cpu_data.x86 != 0x15) || 1837 (boot_cpu_data.x86_model < 0x10) || 1838 (boot_cpu_data.x86_model > 0x1f)) 1839 return; 1840 1841 pci_write_config_dword(iommu->dev, 0xf0, 0x90); 1842 pci_read_config_dword(iommu->dev, 0xf4, &value); 1843 1844 if (value & BIT(2)) 1845 return; 1846 1847 /* Select NB indirect register 0x90 and enable writing */ 1848 pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8)); 1849 1850 pci_write_config_dword(iommu->dev, 0xf4, value | 0x4); 1851 pci_info(iommu->dev, "Applying erratum 746 workaround\n"); 1852 1853 /* Clear the enable writing bit */ 1854 pci_write_config_dword(iommu->dev, 0xf0, 0x90); 1855} 1856 1857/* 1858 * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission) 1859 * Workaround: 1860 * BIOS should enable ATS write permission check by setting 1861 * L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b 1862 */ 1863static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) 1864{ 1865 u32 value; 1866 1867 if ((boot_cpu_data.x86 != 0x15) || 1868 (boot_cpu_data.x86_model < 0x30) || 1869 (boot_cpu_data.x86_model > 0x3f)) 1870 return; 1871 1872 /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */ 1873 value = iommu_read_l2(iommu, 0x47); 1874 1875 if (value & BIT(0)) 1876 return; 1877 1878 /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */ 1879 iommu_write_l2(iommu, 0x47, value | BIT(0)); 1880 1881 pci_info(iommu->dev, "Applying ATS write check workaround\n"); 1882} 1883 1884/* 1885 * This function glues the initialization function for one IOMMU 1886 * together and also allocates the command buffer and programs the 1887 * hardware. It does NOT enable the IOMMU. This is done afterwards. 1888 */ 1889static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, 1890 struct acpi_table_header *ivrs_base) 1891{ 1892 struct amd_iommu_pci_seg *pci_seg; 1893 1894 pci_seg = get_pci_segment(h->pci_seg, ivrs_base); 1895 if (pci_seg == NULL) 1896 return -ENOMEM; 1897 iommu->pci_seg = pci_seg; 1898 1899 raw_spin_lock_init(&iommu->lock); 1900 iommu->cmd_sem_val = 0; 1901 1902 /* Add IOMMU to internal data structures */ 1903 list_add_tail(&iommu->list, &amd_iommu_list); 1904 iommu->index = amd_iommus_present++; 1905 1906 if (unlikely(iommu->index >= MAX_IOMMUS)) { 1907 WARN(1, "System has more IOMMUs than supported by this driver\n"); 1908 return -ENOSYS; 1909 } 1910 1911 /* 1912 * Copy data from ACPI table entry to the iommu struct 1913 */ 1914 iommu->devid = h->devid; 1915 iommu->cap_ptr = h->cap_ptr; 1916 iommu->mmio_phys = h->mmio_phys; 1917 1918 switch (h->type) { 1919 case 0x10: 1920 /* Check if IVHD EFR contains proper max banks/counters */ 1921 if ((h->efr_attr != 0) && 1922 ((h->efr_attr & (0xF << 13)) != 0) && 1923 ((h->efr_attr & (0x3F << 17)) != 0)) 1924 iommu->mmio_phys_end = MMIO_REG_END_OFFSET; 1925 else 1926 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; 1927 1928 /* GAM requires GA mode. */ 1929 if ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0) 1930 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; 1931 break; 1932 case 0x11: 1933 case 0x40: 1934 if (h->efr_reg & (1 << 9)) 1935 iommu->mmio_phys_end = MMIO_REG_END_OFFSET; 1936 else 1937 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; 1938 1939 /* XT and GAM require GA mode. */ 1940 if ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0) { 1941 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; 1942 break; 1943 } 1944 1945 if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) 1946 amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; 1947 1948 if (h->efr_attr & BIT(IOMMU_IVHD_ATTR_HATDIS_SHIFT)) { 1949 pr_warn_once("Host Address Translation is not supported.\n"); 1950 amd_iommu_hatdis = true; 1951 } 1952 1953 early_iommu_features_init(iommu, h); 1954 1955 break; 1956 default: 1957 return -EINVAL; 1958 } 1959 1960 iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys, 1961 iommu->mmio_phys_end); 1962 if (!iommu->mmio_base) 1963 return -ENOMEM; 1964 1965 return init_iommu_from_acpi(iommu, h); 1966} 1967 1968static int __init init_iommu_one_late(struct amd_iommu *iommu) 1969{ 1970 int ret; 1971 1972 ret = alloc_iommu_buffers(iommu); 1973 if (ret) 1974 return ret; 1975 1976 iommu->int_enabled = false; 1977 1978 init_translation_status(iommu); 1979 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) { 1980 iommu_disable(iommu); 1981 clear_translation_pre_enabled(iommu); 1982 pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n", 1983 iommu->index); 1984 } 1985 if (amd_iommu_pre_enabled) 1986 amd_iommu_pre_enabled = translation_pre_enabled(iommu); 1987 1988 if (amd_iommu_irq_remap) { 1989 ret = amd_iommu_create_irq_domain(iommu); 1990 if (ret) 1991 return ret; 1992 } 1993 1994 /* 1995 * Make sure IOMMU is not considered to translate itself. The IVRS 1996 * table tells us so, but this is a lie! 1997 */ 1998 iommu->pci_seg->rlookup_table[iommu->devid] = NULL; 1999 2000 return 0; 2001} 2002 2003/** 2004 * get_highest_supported_ivhd_type - Look up the appropriate IVHD type 2005 * @ivrs: Pointer to the IVRS header 2006 * 2007 * This function search through all IVDB of the maximum supported IVHD 2008 */ 2009static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs) 2010{ 2011 u8 *base = (u8 *)ivrs; 2012 struct ivhd_header *ivhd = (struct ivhd_header *) 2013 (base + IVRS_HEADER_LENGTH); 2014 u8 last_type = ivhd->type; 2015 u16 devid = ivhd->devid; 2016 2017 while (((u8 *)ivhd - base < ivrs->length) && 2018 (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) { 2019 u8 *p = (u8 *) ivhd; 2020 2021 if (ivhd->devid == devid) 2022 last_type = ivhd->type; 2023 ivhd = (struct ivhd_header *)(p + ivhd->length); 2024 } 2025 2026 return last_type; 2027} 2028 2029/* 2030 * Iterates over all IOMMU entries in the ACPI table, allocates the 2031 * IOMMU structure and initializes it with init_iommu_one() 2032 */ 2033static int __init init_iommu_all(struct acpi_table_header *table) 2034{ 2035 u8 *p = (u8 *)table, *end = (u8 *)table; 2036 struct ivhd_header *h; 2037 struct amd_iommu *iommu; 2038 int ret; 2039 2040 end += table->length; 2041 p += IVRS_HEADER_LENGTH; 2042 2043 /* Phase 1: Process all IVHD blocks */ 2044 while (p < end) { 2045 h = (struct ivhd_header *)p; 2046 if (*p == amd_iommu_target_ivhd_type) { 2047 2048 DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x " 2049 "flags: %01x info %04x\n", 2050 h->pci_seg, PCI_BUS_NUM(h->devid), 2051 PCI_SLOT(h->devid), PCI_FUNC(h->devid), 2052 h->cap_ptr, h->flags, h->info); 2053 DUMP_printk(" mmio-addr: %016llx\n", 2054 h->mmio_phys); 2055 2056 iommu = kzalloc_obj(struct amd_iommu); 2057 if (iommu == NULL) 2058 return -ENOMEM; 2059 2060 ret = init_iommu_one(iommu, h, table); 2061 if (ret) 2062 return ret; 2063 } 2064 p += h->length; 2065 2066 } 2067 WARN_ON(p != end); 2068 2069 /* Phase 2 : Early feature support check */ 2070 get_global_efr(); 2071 2072 /* Phase 3 : Enabling IOMMU features */ 2073 for_each_iommu(iommu) { 2074 ret = init_iommu_one_late(iommu); 2075 if (ret) 2076 return ret; 2077 } 2078 2079 return 0; 2080} 2081 2082static void init_iommu_perf_ctr(struct amd_iommu *iommu) 2083{ 2084 u64 val; 2085 struct pci_dev *pdev = iommu->dev; 2086 2087 if (!check_feature(FEATURE_PC)) 2088 return; 2089 2090 amd_iommu_pc_present = true; 2091 2092 pci_info(pdev, "IOMMU performance counters supported\n"); 2093 2094 val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET); 2095 iommu->max_banks = (u8) ((val >> 12) & 0x3f); 2096 iommu->max_counters = (u8) ((val >> 7) & 0xf); 2097 2098 return; 2099} 2100 2101static ssize_t amd_iommu_show_cap(struct device *dev, 2102 struct device_attribute *attr, 2103 char *buf) 2104{ 2105 struct amd_iommu *iommu = dev_to_amd_iommu(dev); 2106 return sysfs_emit(buf, "%x\n", iommu->cap); 2107} 2108static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL); 2109 2110static ssize_t amd_iommu_show_features(struct device *dev, 2111 struct device_attribute *attr, 2112 char *buf) 2113{ 2114 return sysfs_emit(buf, "%llx:%llx\n", amd_iommu_efr, amd_iommu_efr2); 2115} 2116static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL); 2117 2118static struct attribute *amd_iommu_attrs[] = { 2119 &dev_attr_cap.attr, 2120 &dev_attr_features.attr, 2121 NULL, 2122}; 2123 2124static struct attribute_group amd_iommu_group = { 2125 .name = "amd-iommu", 2126 .attrs = amd_iommu_attrs, 2127}; 2128 2129static const struct attribute_group *amd_iommu_groups[] = { 2130 &amd_iommu_group, 2131 NULL, 2132}; 2133 2134/* 2135 * Note: IVHD 0x11 and 0x40 also contains exact copy 2136 * of the IOMMU Extended Feature Register [MMIO Offset 0030h]. 2137 * Default to EFR in IVHD since it is available sooner (i.e. before PCI init). 2138 */ 2139static void __init late_iommu_features_init(struct amd_iommu *iommu) 2140{ 2141 u64 features, features2; 2142 2143 if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) 2144 return; 2145 2146 /* read extended feature bits */ 2147 features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); 2148 features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2); 2149 2150 if (!amd_iommu_efr) { 2151 amd_iommu_efr = features; 2152 amd_iommu_efr2 = features2; 2153 return; 2154 } 2155 2156 /* 2157 * Sanity check and warn if EFR values from 2158 * IVHD and MMIO conflict. 2159 */ 2160 if (features != amd_iommu_efr || 2161 features2 != amd_iommu_efr2) { 2162 pr_warn(FW_WARN 2163 "EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n", 2164 features, amd_iommu_efr, 2165 features2, amd_iommu_efr2); 2166 } 2167} 2168 2169static int __init iommu_init_pci(struct amd_iommu *iommu) 2170{ 2171 int cap_ptr = iommu->cap_ptr; 2172 int ret; 2173 2174 iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, 2175 PCI_BUS_NUM(iommu->devid), 2176 iommu->devid & 0xff); 2177 if (!iommu->dev) 2178 return -ENODEV; 2179 2180 /* ACPI _PRT won't have an IRQ for IOMMU */ 2181 iommu->dev->irq_managed = 1; 2182 2183 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, 2184 &iommu->cap); 2185 2186 if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) 2187 amd_iommu_iotlb_sup = false; 2188 2189 late_iommu_features_init(iommu); 2190 2191 if (check_feature(FEATURE_GT)) { 2192 int glxval; 2193 u64 pasmax; 2194 2195 pasmax = FIELD_GET(FEATURE_PASMAX, amd_iommu_efr); 2196 iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1; 2197 2198 BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK); 2199 2200 glxval = FIELD_GET(FEATURE_GLX, amd_iommu_efr); 2201 2202 if (amd_iommu_max_glx_val == -1) 2203 amd_iommu_max_glx_val = glxval; 2204 else 2205 amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval); 2206 2207 iommu_enable_gt(iommu); 2208 } 2209 2210 if (check_feature(FEATURE_PPR) && amd_iommu_alloc_ppr_log(iommu)) 2211 return -ENOMEM; 2212 2213 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) { 2214 pr_info("Using strict mode due to virtualization\n"); 2215 iommu_set_dma_strict(); 2216 amd_iommu_np_cache = true; 2217 } 2218 2219 init_iommu_perf_ctr(iommu); 2220 2221 if (is_rd890_iommu(iommu->dev)) { 2222 int i, j; 2223 2224 iommu->root_pdev = 2225 pci_get_domain_bus_and_slot(iommu->pci_seg->id, 2226 iommu->dev->bus->number, 2227 PCI_DEVFN(0, 0)); 2228 2229 /* 2230 * Some rd890 systems may not be fully reconfigured by the 2231 * BIOS, so it's necessary for us to store this information so 2232 * it can be reprogrammed on resume 2233 */ 2234 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4, 2235 &iommu->stored_addr_lo); 2236 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8, 2237 &iommu->stored_addr_hi); 2238 2239 /* Low bit locks writes to configuration space */ 2240 iommu->stored_addr_lo &= ~1; 2241 2242 for (i = 0; i < 6; i++) 2243 for (j = 0; j < 0x12; j++) 2244 iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j); 2245 2246 for (i = 0; i < 0x83; i++) 2247 iommu->stored_l2[i] = iommu_read_l2(iommu, i); 2248 } 2249 2250 amd_iommu_erratum_746_workaround(iommu); 2251 amd_iommu_ats_write_check_workaround(iommu); 2252 2253 ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev, 2254 amd_iommu_groups, "ivhd%d", iommu->index); 2255 if (ret) 2256 return ret; 2257 2258 /* 2259 * Allocate per IOMMU IOPF queue here so that in attach device path, 2260 * PRI capable device can be added to IOPF queue 2261 */ 2262 if (amd_iommu_gt_ppr_supported()) { 2263 ret = amd_iommu_iopf_init(iommu); 2264 if (ret) 2265 return ret; 2266 } 2267 2268 ret = iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); 2269 if (ret || amd_iommu_pgtable == PD_MODE_NONE) { 2270 /* 2271 * Remove sysfs if DMA translation is not supported by the 2272 * IOMMU. Do not return an error to enable IRQ remapping 2273 * in state_next(), DTE[V, TV] must eventually be set to 0. 2274 */ 2275 iommu_device_sysfs_remove(&iommu->iommu); 2276 } 2277 2278 return pci_enable_device(iommu->dev); 2279} 2280 2281static void print_iommu_info(void) 2282{ 2283 int i; 2284 static const char * const feat_str[] = { 2285 "PreF", "PPR", "X2APIC", "NX", "GT", "[5]", 2286 "IA", "GA", "HE", "PC" 2287 }; 2288 2289 if (amd_iommu_efr) { 2290 pr_info("Extended features (%#llx, %#llx):", amd_iommu_efr, amd_iommu_efr2); 2291 2292 for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { 2293 if (check_feature(1ULL << i)) 2294 pr_cont(" %s", feat_str[i]); 2295 } 2296 2297 if (check_feature(FEATURE_GAM_VAPIC)) 2298 pr_cont(" GA_vAPIC"); 2299 2300 if (check_feature(FEATURE_SNP)) 2301 pr_cont(" SNP"); 2302 2303 if (check_feature2(FEATURE_SEVSNPIO_SUP)) 2304 pr_cont(" SEV-TIO"); 2305 2306 pr_cont("\n"); 2307 } 2308 2309 if (irq_remapping_enabled) { 2310 pr_info("Interrupt remapping enabled\n"); 2311 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2312 pr_info("X2APIC enabled\n"); 2313 } 2314 if (amd_iommu_pgtable == PD_MODE_V2) { 2315 pr_info("V2 page table enabled (Paging mode : %d level)\n", 2316 amd_iommu_gpt_level); 2317 } 2318} 2319 2320static int __init amd_iommu_init_pci(void) 2321{ 2322 struct amd_iommu *iommu; 2323 struct amd_iommu_pci_seg *pci_seg; 2324 int ret; 2325 2326 /* Init global identity domain before registering IOMMU */ 2327 amd_iommu_init_identity_domain(); 2328 2329 for_each_iommu(iommu) { 2330 ret = iommu_init_pci(iommu); 2331 if (ret) { 2332 pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n", 2333 iommu->index, ret); 2334 goto out; 2335 } 2336 /* Need to setup range after PCI init */ 2337 iommu_set_cwwb_range(iommu); 2338 } 2339 2340 /* 2341 * Order is important here to make sure any unity map requirements are 2342 * fulfilled. The unity mappings are created and written to the device 2343 * table during the iommu_init_pci() call. 2344 * 2345 * After that we call init_device_table_dma() to make sure any 2346 * uninitialized DTE will block DMA, and in the end we flush the caches 2347 * of all IOMMUs to make sure the changes to the device table are 2348 * active. 2349 */ 2350 for_each_pci_segment(pci_seg) 2351 init_device_table_dma(pci_seg); 2352 2353 for_each_iommu(iommu) 2354 amd_iommu_flush_all_caches(iommu); 2355 2356 print_iommu_info(); 2357 2358out: 2359 return ret; 2360} 2361 2362/**************************************************************************** 2363 * 2364 * The following functions initialize the MSI interrupts for all IOMMUs 2365 * in the system. It's a bit challenging because there could be multiple 2366 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per 2367 * pci_dev. 2368 * 2369 ****************************************************************************/ 2370 2371static int iommu_setup_msi(struct amd_iommu *iommu) 2372{ 2373 int r; 2374 2375 r = pci_enable_msi(iommu->dev); 2376 if (r) 2377 return r; 2378 2379 r = request_threaded_irq(iommu->dev->irq, NULL, amd_iommu_int_thread, 2380 IRQF_ONESHOT, "AMD-Vi", iommu); 2381 if (r) { 2382 pci_disable_msi(iommu->dev); 2383 return r; 2384 } 2385 2386 return 0; 2387} 2388 2389union intcapxt { 2390 u64 capxt; 2391 struct { 2392 u64 reserved_0 : 2, 2393 dest_mode_logical : 1, 2394 reserved_1 : 5, 2395 destid_0_23 : 24, 2396 vector : 8, 2397 reserved_2 : 16, 2398 destid_24_31 : 8; 2399 }; 2400} __attribute__ ((packed)); 2401 2402 2403static struct irq_chip intcapxt_controller; 2404 2405static int intcapxt_irqdomain_activate(struct irq_domain *domain, 2406 struct irq_data *irqd, bool reserve) 2407{ 2408 return 0; 2409} 2410 2411static void intcapxt_irqdomain_deactivate(struct irq_domain *domain, 2412 struct irq_data *irqd) 2413{ 2414} 2415 2416 2417static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, 2418 unsigned int nr_irqs, void *arg) 2419{ 2420 struct irq_alloc_info *info = arg; 2421 int i, ret; 2422 2423 if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI) 2424 return -EINVAL; 2425 2426 ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); 2427 if (ret < 0) 2428 return ret; 2429 2430 for (i = virq; i < virq + nr_irqs; i++) { 2431 struct irq_data *irqd = irq_domain_get_irq_data(domain, i); 2432 2433 irqd->chip = &intcapxt_controller; 2434 irqd->hwirq = info->hwirq; 2435 irqd->chip_data = info->data; 2436 __irq_set_handler(i, handle_edge_irq, 0, "edge"); 2437 } 2438 2439 return ret; 2440} 2441 2442static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq, 2443 unsigned int nr_irqs) 2444{ 2445 irq_domain_free_irqs_top(domain, virq, nr_irqs); 2446} 2447 2448 2449static void intcapxt_unmask_irq(struct irq_data *irqd) 2450{ 2451 struct amd_iommu *iommu = irqd->chip_data; 2452 struct irq_cfg *cfg = irqd_cfg(irqd); 2453 union intcapxt xt; 2454 2455 xt.capxt = 0ULL; 2456 xt.dest_mode_logical = apic->dest_mode_logical; 2457 xt.vector = cfg->vector; 2458 xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0); 2459 xt.destid_24_31 = cfg->dest_apicid >> 24; 2460 2461 writeq(xt.capxt, iommu->mmio_base + irqd->hwirq); 2462} 2463 2464static void intcapxt_mask_irq(struct irq_data *irqd) 2465{ 2466 struct amd_iommu *iommu = irqd->chip_data; 2467 2468 writeq(0, iommu->mmio_base + irqd->hwirq); 2469} 2470 2471 2472static int intcapxt_set_affinity(struct irq_data *irqd, 2473 const struct cpumask *mask, bool force) 2474{ 2475 struct irq_data *parent = irqd->parent_data; 2476 int ret; 2477 2478 ret = parent->chip->irq_set_affinity(parent, mask, force); 2479 if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) 2480 return ret; 2481 return 0; 2482} 2483 2484static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on) 2485{ 2486 return on ? -EOPNOTSUPP : 0; 2487} 2488 2489static struct irq_chip intcapxt_controller = { 2490 .name = "IOMMU-MSI", 2491 .irq_unmask = intcapxt_unmask_irq, 2492 .irq_mask = intcapxt_mask_irq, 2493 .irq_ack = irq_chip_ack_parent, 2494 .irq_retrigger = irq_chip_retrigger_hierarchy, 2495 .irq_set_affinity = intcapxt_set_affinity, 2496 .irq_set_wake = intcapxt_set_wake, 2497 .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_MOVE_DEFERRED, 2498}; 2499 2500static const struct irq_domain_ops intcapxt_domain_ops = { 2501 .alloc = intcapxt_irqdomain_alloc, 2502 .free = intcapxt_irqdomain_free, 2503 .activate = intcapxt_irqdomain_activate, 2504 .deactivate = intcapxt_irqdomain_deactivate, 2505}; 2506 2507 2508static struct irq_domain *iommu_irqdomain; 2509 2510static struct irq_domain *iommu_get_irqdomain(void) 2511{ 2512 struct fwnode_handle *fn; 2513 2514 /* No need for locking here (yet) as the init is single-threaded */ 2515 if (iommu_irqdomain) 2516 return iommu_irqdomain; 2517 2518 fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI"); 2519 if (!fn) 2520 return NULL; 2521 2522 iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0, 2523 fn, &intcapxt_domain_ops, 2524 NULL); 2525 if (!iommu_irqdomain) 2526 irq_domain_free_fwnode(fn); 2527 2528 return iommu_irqdomain; 2529} 2530 2531static int __iommu_setup_intcapxt(struct amd_iommu *iommu, const char *devname, 2532 int hwirq, irq_handler_t thread_fn) 2533{ 2534 struct irq_domain *domain; 2535 struct irq_alloc_info info; 2536 int irq, ret; 2537 int node = dev_to_node(&iommu->dev->dev); 2538 2539 domain = iommu_get_irqdomain(); 2540 if (!domain) 2541 return -ENXIO; 2542 2543 init_irq_alloc_info(&info, NULL); 2544 info.type = X86_IRQ_ALLOC_TYPE_AMDVI; 2545 info.data = iommu; 2546 info.hwirq = hwirq; 2547 2548 irq = irq_domain_alloc_irqs(domain, 1, node, &info); 2549 if (irq < 0) { 2550 irq_domain_remove(domain); 2551 return irq; 2552 } 2553 2554 ret = request_threaded_irq(irq, NULL, thread_fn, IRQF_ONESHOT, devname, 2555 iommu); 2556 if (ret) { 2557 irq_domain_free_irqs(irq, 1); 2558 irq_domain_remove(domain); 2559 return ret; 2560 } 2561 2562 return 0; 2563} 2564 2565static int iommu_setup_intcapxt(struct amd_iommu *iommu) 2566{ 2567 int ret; 2568 2569 snprintf(iommu->evt_irq_name, sizeof(iommu->evt_irq_name), 2570 "AMD-Vi%d-Evt", iommu->index); 2571 ret = __iommu_setup_intcapxt(iommu, iommu->evt_irq_name, 2572 MMIO_INTCAPXT_EVT_OFFSET, 2573 amd_iommu_int_thread_evtlog); 2574 if (ret) 2575 return ret; 2576 2577 snprintf(iommu->ppr_irq_name, sizeof(iommu->ppr_irq_name), 2578 "AMD-Vi%d-PPR", iommu->index); 2579 ret = __iommu_setup_intcapxt(iommu, iommu->ppr_irq_name, 2580 MMIO_INTCAPXT_PPR_OFFSET, 2581 amd_iommu_int_thread_pprlog); 2582 if (ret) 2583 return ret; 2584 2585#ifdef CONFIG_IRQ_REMAP 2586 snprintf(iommu->ga_irq_name, sizeof(iommu->ga_irq_name), 2587 "AMD-Vi%d-GA", iommu->index); 2588 ret = __iommu_setup_intcapxt(iommu, iommu->ga_irq_name, 2589 MMIO_INTCAPXT_GALOG_OFFSET, 2590 amd_iommu_int_thread_galog); 2591#endif 2592 2593 return ret; 2594} 2595 2596static int iommu_init_irq(struct amd_iommu *iommu) 2597{ 2598 int ret; 2599 2600 if (iommu->int_enabled) 2601 goto enable_faults; 2602 2603 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2604 ret = iommu_setup_intcapxt(iommu); 2605 else if (iommu->dev->msi_cap) 2606 ret = iommu_setup_msi(iommu); 2607 else 2608 ret = -ENODEV; 2609 2610 if (ret) 2611 return ret; 2612 2613 iommu->int_enabled = true; 2614enable_faults: 2615 2616 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2617 iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); 2618 2619 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); 2620 2621 return 0; 2622} 2623 2624/**************************************************************************** 2625 * 2626 * The next functions belong to the third pass of parsing the ACPI 2627 * table. In this last pass the memory mapping requirements are 2628 * gathered (like exclusion and unity mapping ranges). 2629 * 2630 ****************************************************************************/ 2631 2632static void __init free_unity_maps(void) 2633{ 2634 struct unity_map_entry *entry, *next; 2635 struct amd_iommu_pci_seg *p, *pci_seg; 2636 2637 for_each_pci_segment_safe(pci_seg, p) { 2638 list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) { 2639 list_del(&entry->list); 2640 kfree(entry); 2641 } 2642 } 2643} 2644 2645/* called for unity map ACPI definition */ 2646static int __init init_unity_map_range(struct ivmd_header *m, 2647 struct acpi_table_header *ivrs_base) 2648{ 2649 struct unity_map_entry *e = NULL; 2650 struct amd_iommu_pci_seg *pci_seg; 2651 char *s; 2652 2653 pci_seg = get_pci_segment(m->pci_seg, ivrs_base); 2654 if (pci_seg == NULL) 2655 return -ENOMEM; 2656 2657 e = kzalloc_obj(*e); 2658 if (e == NULL) 2659 return -ENOMEM; 2660 2661 switch (m->type) { 2662 default: 2663 kfree(e); 2664 return 0; 2665 case ACPI_IVMD_TYPE: 2666 s = "IVMD_TYPEi\t\t\t"; 2667 e->devid_start = e->devid_end = m->devid; 2668 break; 2669 case ACPI_IVMD_TYPE_ALL: 2670 s = "IVMD_TYPE_ALL\t\t"; 2671 e->devid_start = 0; 2672 e->devid_end = pci_seg->last_bdf; 2673 break; 2674 case ACPI_IVMD_TYPE_RANGE: 2675 s = "IVMD_TYPE_RANGE\t\t"; 2676 e->devid_start = m->devid; 2677 e->devid_end = m->aux; 2678 break; 2679 } 2680 e->address_start = PAGE_ALIGN(m->range_start); 2681 e->address_end = e->address_start + PAGE_ALIGN(m->range_length); 2682 e->prot = m->flags >> 1; 2683 2684 /* 2685 * Treat per-device exclusion ranges as r/w unity-mapped regions 2686 * since some buggy BIOSes might lead to the overwritten exclusion 2687 * range (exclusion_start and exclusion_length members). This 2688 * happens when there are multiple exclusion ranges (IVMD entries) 2689 * defined in ACPI table. 2690 */ 2691 if (m->flags & IVMD_FLAG_EXCL_RANGE) 2692 e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1; 2693 2694 DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: " 2695 "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx" 2696 " flags: %x\n", s, m->pci_seg, 2697 PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start), 2698 PCI_FUNC(e->devid_start), m->pci_seg, 2699 PCI_BUS_NUM(e->devid_end), 2700 PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end), 2701 e->address_start, e->address_end, m->flags); 2702 2703 list_add_tail(&e->list, &pci_seg->unity_map); 2704 2705 return 0; 2706} 2707 2708/* iterates over all memory definitions we find in the ACPI table */ 2709static int __init init_memory_definitions(struct acpi_table_header *table) 2710{ 2711 u8 *p = (u8 *)table, *end = (u8 *)table; 2712 struct ivmd_header *m; 2713 2714 end += table->length; 2715 p += IVRS_HEADER_LENGTH; 2716 2717 while (p < end) { 2718 m = (struct ivmd_header *)p; 2719 if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE)) 2720 init_unity_map_range(m, table); 2721 2722 p += m->length; 2723 } 2724 2725 return 0; 2726} 2727 2728/* 2729 * Init the device table to not allow DMA access for devices 2730 */ 2731static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) 2732{ 2733 u32 devid; 2734 struct dev_table_entry *dev_table = pci_seg->dev_table; 2735 2736 if (!dev_table || amd_iommu_pgtable == PD_MODE_NONE) 2737 return; 2738 2739 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { 2740 set_dte_bit(&dev_table[devid], DEV_ENTRY_VALID); 2741 if (!amd_iommu_snp_en) 2742 set_dte_bit(&dev_table[devid], DEV_ENTRY_TRANSLATION); 2743 } 2744} 2745 2746static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg) 2747{ 2748 u32 devid; 2749 struct dev_table_entry *dev_table = pci_seg->dev_table; 2750 2751 if (dev_table == NULL) 2752 return; 2753 2754 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { 2755 dev_table[devid].data[0] = 0ULL; 2756 dev_table[devid].data[1] = 0ULL; 2757 } 2758} 2759 2760static void init_device_table(void) 2761{ 2762 struct amd_iommu_pci_seg *pci_seg; 2763 u32 devid; 2764 2765 if (!amd_iommu_irq_remap) 2766 return; 2767 2768 for_each_pci_segment(pci_seg) { 2769 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) 2770 set_dte_bit(&pci_seg->dev_table[devid], DEV_ENTRY_IRQ_TBL_EN); 2771 } 2772} 2773 2774static void iommu_init_flags(struct amd_iommu *iommu) 2775{ 2776 iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? 2777 iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : 2778 iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); 2779 2780 iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ? 2781 iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : 2782 iommu_feature_disable(iommu, CONTROL_PASSPW_EN); 2783 2784 iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ? 2785 iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : 2786 iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); 2787 2788 iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ? 2789 iommu_feature_enable(iommu, CONTROL_ISOC_EN) : 2790 iommu_feature_disable(iommu, CONTROL_ISOC_EN); 2791 2792 /* 2793 * make IOMMU memory accesses cache coherent 2794 */ 2795 iommu_feature_enable(iommu, CONTROL_COHERENT_EN); 2796 2797 /* Set IOTLB invalidation timeout to 1s */ 2798 iommu_feature_set(iommu, CTRL_INV_TO_1S, CTRL_INV_TO_MASK, CONTROL_INV_TIMEOUT); 2799 2800 /* Enable Enhanced Peripheral Page Request Handling */ 2801 if (check_feature(FEATURE_EPHSUP)) 2802 iommu_feature_enable(iommu, CONTROL_EPH_EN); 2803} 2804 2805static void iommu_apply_resume_quirks(struct amd_iommu *iommu) 2806{ 2807 int i, j; 2808 u32 ioc_feature_control; 2809 struct pci_dev *pdev = iommu->root_pdev; 2810 2811 /* RD890 BIOSes may not have completely reconfigured the iommu */ 2812 if (!is_rd890_iommu(iommu->dev) || !pdev) 2813 return; 2814 2815 /* 2816 * First, we need to ensure that the iommu is enabled. This is 2817 * controlled by a register in the northbridge 2818 */ 2819 2820 /* Select Northbridge indirect register 0x75 and enable writing */ 2821 pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); 2822 pci_read_config_dword(pdev, 0x64, &ioc_feature_control); 2823 2824 /* Enable the iommu */ 2825 if (!(ioc_feature_control & 0x1)) 2826 pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); 2827 2828 /* Restore the iommu BAR */ 2829 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, 2830 iommu->stored_addr_lo); 2831 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8, 2832 iommu->stored_addr_hi); 2833 2834 /* Restore the l1 indirect regs for each of the 6 l1s */ 2835 for (i = 0; i < 6; i++) 2836 for (j = 0; j < 0x12; j++) 2837 iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]); 2838 2839 /* Restore the l2 indirect regs */ 2840 for (i = 0; i < 0x83; i++) 2841 iommu_write_l2(iommu, i, iommu->stored_l2[i]); 2842 2843 /* Lock PCI setup registers */ 2844 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, 2845 iommu->stored_addr_lo | 1); 2846} 2847 2848static void iommu_enable_ga(struct amd_iommu *iommu) 2849{ 2850#ifdef CONFIG_IRQ_REMAP 2851 switch (amd_iommu_guest_ir) { 2852 case AMD_IOMMU_GUEST_IR_VAPIC: 2853 case AMD_IOMMU_GUEST_IR_LEGACY_GA: 2854 iommu_feature_enable(iommu, CONTROL_GA_EN); 2855 iommu->irte_ops = &irte_128_ops; 2856 break; 2857 default: 2858 iommu->irte_ops = &irte_32_ops; 2859 break; 2860 } 2861#endif 2862} 2863 2864static void iommu_disable_irtcachedis(struct amd_iommu *iommu) 2865{ 2866 iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS); 2867} 2868 2869static void iommu_enable_irtcachedis(struct amd_iommu *iommu) 2870{ 2871 u64 ctrl; 2872 2873 if (!amd_iommu_irtcachedis) 2874 return; 2875 2876 /* 2877 * Note: 2878 * The support for IRTCacheDis feature is dertermined by 2879 * checking if the bit is writable. 2880 */ 2881 iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS); 2882 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 2883 ctrl &= (1ULL << CONTROL_IRTCACHEDIS); 2884 if (ctrl) 2885 iommu->irtcachedis_enabled = true; 2886 pr_info("iommu%d (%#06x) : IRT cache is %s\n", 2887 iommu->index, iommu->devid, 2888 iommu->irtcachedis_enabled ? "disabled" : "enabled"); 2889} 2890 2891static void iommu_enable_2k_int(struct amd_iommu *iommu) 2892{ 2893 if (!FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2)) 2894 return; 2895 2896 iommu_feature_set(iommu, 2897 CONTROL_NUM_INT_REMAP_MODE_2K, 2898 CONTROL_NUM_INT_REMAP_MODE_MASK, 2899 CONTROL_NUM_INT_REMAP_MODE); 2900} 2901 2902static void early_enable_iommu(struct amd_iommu *iommu) 2903{ 2904 iommu_disable(iommu); 2905 iommu_init_flags(iommu); 2906 iommu_set_device_table(iommu); 2907 iommu_enable_command_buffer(iommu); 2908 iommu_set_exclusion_range(iommu); 2909 iommu_enable_gt(iommu); 2910 iommu_enable_ga(iommu); 2911 iommu_enable_xt(iommu); 2912 iommu_enable_irtcachedis(iommu); 2913 iommu_enable_2k_int(iommu); 2914 iommu_enable(iommu); 2915 amd_iommu_flush_all_caches(iommu); 2916} 2917 2918/* 2919 * This function finally enables all IOMMUs found in the system after 2920 * they have been initialized. 2921 * 2922 * Or if in kdump kernel and IOMMUs are all pre-enabled, try to reuse 2923 * the old content of device table entries. Not this case or reuse failed, 2924 * just continue as normal kernel does. 2925 */ 2926static void early_enable_iommus(void) 2927{ 2928 struct amd_iommu *iommu; 2929 struct amd_iommu_pci_seg *pci_seg; 2930 2931 if (!reuse_device_table()) { 2932 /* 2933 * If come here because of failure in reusing device table from old 2934 * kernel with all IOMMUs enabled, print error message and try to 2935 * free allocated old_dev_tbl_cpy. 2936 */ 2937 if (amd_iommu_pre_enabled) { 2938 pr_err("Failed to reuse DEV table from previous kernel.\n"); 2939 /* 2940 * Bail out early if unable to remap/reuse DEV table from 2941 * previous kernel if SNP enabled as IOMMU commands will 2942 * time out without DEV table and cause kdump boot panic. 2943 */ 2944 BUG_ON(check_feature(FEATURE_SNP)); 2945 } 2946 2947 for_each_pci_segment(pci_seg) { 2948 if (pci_seg->old_dev_tbl_cpy != NULL) { 2949 memunmap((void *)pci_seg->old_dev_tbl_cpy); 2950 pci_seg->old_dev_tbl_cpy = NULL; 2951 } 2952 } 2953 2954 for_each_iommu(iommu) { 2955 clear_translation_pre_enabled(iommu); 2956 early_enable_iommu(iommu); 2957 } 2958 } else { 2959 pr_info("Reused DEV table from previous kernel.\n"); 2960 2961 for_each_pci_segment(pci_seg) { 2962 iommu_free_pages(pci_seg->dev_table); 2963 pci_seg->dev_table = pci_seg->old_dev_tbl_cpy; 2964 } 2965 2966 for_each_iommu(iommu) { 2967 iommu_disable_command_buffer(iommu); 2968 iommu_disable_event_buffer(iommu); 2969 iommu_disable_irtcachedis(iommu); 2970 iommu_enable_command_buffer(iommu); 2971 iommu_enable_ga(iommu); 2972 iommu_enable_xt(iommu); 2973 iommu_enable_irtcachedis(iommu); 2974 iommu_enable_2k_int(iommu); 2975 iommu_set_device_table(iommu); 2976 amd_iommu_flush_all_caches(iommu); 2977 } 2978 } 2979} 2980 2981static void enable_iommus_ppr(void) 2982{ 2983 struct amd_iommu *iommu; 2984 2985 if (!amd_iommu_gt_ppr_supported()) 2986 return; 2987 2988 for_each_iommu(iommu) 2989 amd_iommu_enable_ppr_log(iommu); 2990} 2991 2992static void enable_iommus_vapic(void) 2993{ 2994#ifdef CONFIG_IRQ_REMAP 2995 u32 status, i; 2996 struct amd_iommu *iommu; 2997 2998 for_each_iommu(iommu) { 2999 /* 3000 * Disable GALog if already running. It could have been enabled 3001 * in the previous boot before kdump. 3002 */ 3003 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 3004 if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) 3005 continue; 3006 3007 iommu_feature_disable(iommu, CONTROL_GALOG_EN); 3008 iommu_feature_disable(iommu, CONTROL_GAINT_EN); 3009 3010 /* 3011 * Need to set and poll check the GALOGRun bit to zero before 3012 * we can set/ modify GA Log registers safely. 3013 */ 3014 for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) { 3015 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 3016 if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) 3017 break; 3018 udelay(10); 3019 } 3020 3021 if (WARN_ON(i >= MMIO_STATUS_TIMEOUT)) 3022 return; 3023 } 3024 3025 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && 3026 !check_feature(FEATURE_GAM_VAPIC)) { 3027 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 3028 return; 3029 } 3030 3031 if (amd_iommu_snp_en && 3032 !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) { 3033 pr_warn("Force to disable Virtual APIC due to SNP\n"); 3034 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 3035 return; 3036 } 3037 3038 /* Enabling GAM and SNPAVIC support */ 3039 for_each_iommu(iommu) { 3040 if (iommu_init_ga_log(iommu) || 3041 iommu_ga_log_enable(iommu)) 3042 return; 3043 3044 iommu_feature_enable(iommu, CONTROL_GAM_EN); 3045 if (amd_iommu_snp_en) 3046 iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN); 3047 } 3048 3049 amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP); 3050 pr_info("Virtual APIC enabled\n"); 3051#endif 3052} 3053 3054static void disable_iommus(void) 3055{ 3056 struct amd_iommu *iommu; 3057 3058 for_each_iommu(iommu) 3059 iommu_disable(iommu); 3060 3061#ifdef CONFIG_IRQ_REMAP 3062 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) 3063 amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP); 3064#endif 3065} 3066 3067/* 3068 * Suspend/Resume support 3069 * disable suspend until real resume implemented 3070 */ 3071 3072static void amd_iommu_resume(void *data) 3073{ 3074 struct amd_iommu *iommu; 3075 3076 for_each_iommu(iommu) 3077 iommu_apply_resume_quirks(iommu); 3078 3079 /* re-load the hardware */ 3080 for_each_iommu(iommu) 3081 early_enable_iommu(iommu); 3082 3083 iommu_enable_event_buffer(); 3084 amd_iommu_enable_interrupts(); 3085} 3086 3087static int amd_iommu_suspend(void *data) 3088{ 3089 /* disable IOMMUs to go out of the way for BIOS */ 3090 disable_iommus(); 3091 3092 return 0; 3093} 3094 3095static const struct syscore_ops amd_iommu_syscore_ops = { 3096 .suspend = amd_iommu_suspend, 3097 .resume = amd_iommu_resume, 3098}; 3099 3100static struct syscore amd_iommu_syscore = { 3101 .ops = &amd_iommu_syscore_ops, 3102}; 3103 3104static void __init free_iommu_resources(void) 3105{ 3106 free_iommu_all(); 3107 free_pci_segments(); 3108} 3109 3110/* SB IOAPIC is always on this device in AMD systems */ 3111#define IOAPIC_SB_DEVID ((0x00 << 8) | PCI_DEVFN(0x14, 0)) 3112 3113static bool __init check_ioapic_information(void) 3114{ 3115 const char *fw_bug = FW_BUG; 3116 bool ret, has_sb_ioapic; 3117 int idx; 3118 3119 has_sb_ioapic = false; 3120 ret = false; 3121 3122 /* 3123 * If we have map overrides on the kernel command line the 3124 * messages in this function might not describe firmware bugs 3125 * anymore - so be careful 3126 */ 3127 if (cmdline_maps) 3128 fw_bug = ""; 3129 3130 for (idx = 0; idx < nr_ioapics; idx++) { 3131 int devid, id = mpc_ioapic_id(idx); 3132 3133 devid = get_ioapic_devid(id); 3134 if (devid < 0) { 3135 pr_err("%s: IOAPIC[%d] not in IVRS table\n", 3136 fw_bug, id); 3137 ret = false; 3138 } else if (devid == IOAPIC_SB_DEVID) { 3139 has_sb_ioapic = true; 3140 ret = true; 3141 } 3142 } 3143 3144 if (!has_sb_ioapic) { 3145 /* 3146 * We expect the SB IOAPIC to be listed in the IVRS 3147 * table. The system timer is connected to the SB IOAPIC 3148 * and if we don't have it in the list the system will 3149 * panic at boot time. This situation usually happens 3150 * when the BIOS is buggy and provides us the wrong 3151 * device id for the IOAPIC in the system. 3152 */ 3153 pr_err("%s: No southbridge IOAPIC found\n", fw_bug); 3154 } 3155 3156 if (!ret) 3157 pr_err("Disabling interrupt remapping\n"); 3158 3159 return ret; 3160} 3161 3162static void __init free_dma_resources(void) 3163{ 3164 amd_iommu_pdom_id_destroy(); 3165 free_unity_maps(); 3166} 3167 3168static void __init ivinfo_init(void *ivrs) 3169{ 3170 amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET)); 3171} 3172 3173/* 3174 * This is the hardware init function for AMD IOMMU in the system. 3175 * This function is called either from amd_iommu_init or from the interrupt 3176 * remapping setup code. 3177 * 3178 * This function basically parses the ACPI table for AMD IOMMU (IVRS) 3179 * four times: 3180 * 3181 * 1 pass) Discover the most comprehensive IVHD type to use. 3182 * 3183 * 2 pass) Find the highest PCI device id the driver has to handle. 3184 * Upon this information the size of the data structures is 3185 * determined that needs to be allocated. 3186 * 3187 * 3 pass) Initialize the data structures just allocated with the 3188 * information in the ACPI table about available AMD IOMMUs 3189 * in the system. It also maps the PCI devices in the 3190 * system to specific IOMMUs 3191 * 3192 * 4 pass) After the basic data structures are allocated and 3193 * initialized we update them with information about memory 3194 * remapping requirements parsed out of the ACPI table in 3195 * this last pass. 3196 * 3197 * After everything is set up the IOMMUs are enabled and the necessary 3198 * hotplug and suspend notifiers are registered. 3199 */ 3200static int __init early_amd_iommu_init(void) 3201{ 3202 struct acpi_table_header *ivrs_base; 3203 int ret; 3204 acpi_status status; 3205 u8 efr_hats; 3206 3207 if (!amd_iommu_detected) 3208 return -ENODEV; 3209 3210 status = acpi_get_table("IVRS", 0, &ivrs_base); 3211 if (status == AE_NOT_FOUND) 3212 return -ENODEV; 3213 else if (ACPI_FAILURE(status)) { 3214 const char *err = acpi_format_exception(status); 3215 pr_err("IVRS table error: %s\n", err); 3216 return -EINVAL; 3217 } 3218 3219 if (!boot_cpu_has(X86_FEATURE_CX16)) { 3220 pr_err("Failed to initialize. The CMPXCHG16B feature is required.\n"); 3221 ret = -EINVAL; 3222 goto out; 3223 } 3224 3225 /* 3226 * Validate checksum here so we don't need to do it when 3227 * we actually parse the table 3228 */ 3229 ret = check_ivrs_checksum(ivrs_base); 3230 if (ret) 3231 goto out; 3232 3233 ivinfo_init(ivrs_base); 3234 3235 amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); 3236 DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type); 3237 3238 /* 3239 * now the data structures are allocated and basically initialized 3240 * start the real acpi table scan 3241 */ 3242 ret = init_iommu_all(ivrs_base); 3243 if (ret) 3244 goto out; 3245 3246 /* 5 level guest page table */ 3247 if (cpu_feature_enabled(X86_FEATURE_LA57) && 3248 FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL) 3249 amd_iommu_gpt_level = PAGE_MODE_5_LEVEL; 3250 3251 efr_hats = FIELD_GET(FEATURE_HATS, amd_iommu_efr); 3252 if (efr_hats != 0x3) { 3253 /* 3254 * efr[HATS] bits specify the maximum host translation level 3255 * supported, with LEVEL 4 being initial max level. 3256 */ 3257 amd_iommu_hpt_level = efr_hats + PAGE_MODE_4_LEVEL; 3258 } else { 3259 pr_warn_once(FW_BUG "Disable host address translation due to invalid translation level (%#x).\n", 3260 efr_hats); 3261 amd_iommu_hatdis = true; 3262 } 3263 3264 if (amd_iommu_pgtable == PD_MODE_V2) { 3265 if (!amd_iommu_v2_pgtbl_supported()) { 3266 pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); 3267 amd_iommu_pgtable = PD_MODE_V1; 3268 } 3269 } 3270 3271 if (amd_iommu_hatdis) { 3272 /* 3273 * Host (v1) page table is not available. Attempt to use 3274 * Guest (v2) page table. 3275 */ 3276 if (amd_iommu_v2_pgtbl_supported()) 3277 amd_iommu_pgtable = PD_MODE_V2; 3278 else 3279 amd_iommu_pgtable = PD_MODE_NONE; 3280 } 3281 3282 /* Disable any previously enabled IOMMUs */ 3283 if (!is_kdump_kernel() || amd_iommu_disabled) 3284 disable_iommus(); 3285 3286 if (amd_iommu_irq_remap) 3287 amd_iommu_irq_remap = check_ioapic_information(); 3288 3289 if (amd_iommu_irq_remap) { 3290 struct amd_iommu_pci_seg *pci_seg; 3291 ret = -ENOMEM; 3292 for_each_pci_segment(pci_seg) { 3293 if (alloc_irq_lookup_table(pci_seg)) 3294 goto out; 3295 } 3296 } 3297 3298 ret = init_memory_definitions(ivrs_base); 3299 if (ret) 3300 goto out; 3301 3302 /* init the device table */ 3303 init_device_table(); 3304 3305out: 3306 /* Don't leak any ACPI memory */ 3307 acpi_put_table(ivrs_base); 3308 3309 return ret; 3310} 3311 3312static int amd_iommu_enable_interrupts(void) 3313{ 3314 struct amd_iommu *iommu; 3315 int ret = 0; 3316 3317 for_each_iommu(iommu) { 3318 ret = iommu_init_irq(iommu); 3319 if (ret) 3320 goto out; 3321 } 3322 3323 /* 3324 * Interrupt handler is ready to process interrupts. Enable 3325 * PPR and GA log interrupt for all IOMMUs. 3326 */ 3327 enable_iommus_vapic(); 3328 enable_iommus_ppr(); 3329 3330out: 3331 return ret; 3332} 3333 3334static bool __init detect_ivrs(void) 3335{ 3336 struct acpi_table_header *ivrs_base; 3337 acpi_status status; 3338 int i; 3339 3340 status = acpi_get_table("IVRS", 0, &ivrs_base); 3341 if (status == AE_NOT_FOUND) 3342 return false; 3343 else if (ACPI_FAILURE(status)) { 3344 const char *err = acpi_format_exception(status); 3345 pr_err("IVRS table error: %s\n", err); 3346 return false; 3347 } 3348 3349 acpi_put_table(ivrs_base); 3350 3351 if (amd_iommu_force_enable) 3352 goto out; 3353 3354 /* Don't use IOMMU if there is Stoney Ridge graphics */ 3355 for (i = 0; i < 32; i++) { 3356 u32 pci_id; 3357 3358 pci_id = read_pci_config(0, i, 0, 0); 3359 if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) { 3360 pr_info("Disable IOMMU on Stoney Ridge\n"); 3361 return false; 3362 } 3363 } 3364 3365out: 3366 /* Make sure ACS will be enabled during PCI probe */ 3367 pci_request_acs(); 3368 3369 return true; 3370} 3371 3372static __init void iommu_snp_enable(void) 3373{ 3374#ifdef CONFIG_KVM_AMD_SEV 3375 if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) 3376 return; 3377 /* 3378 * The SNP support requires that IOMMU must be enabled, and is 3379 * configured with V1 page table (DTE[Mode] = 0 is not supported). 3380 */ 3381 if (no_iommu || iommu_default_passthrough()) { 3382 pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n"); 3383 goto disable_snp; 3384 } 3385 3386 if (amd_iommu_pgtable != PD_MODE_V1) { 3387 pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n"); 3388 goto disable_snp; 3389 } 3390 3391 amd_iommu_snp_en = check_feature(FEATURE_SNP); 3392 if (!amd_iommu_snp_en) { 3393 pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n"); 3394 goto disable_snp; 3395 } 3396 3397 /* 3398 * Enable host SNP support once SNP support is checked on IOMMU. 3399 */ 3400 if (snp_rmptable_init()) { 3401 pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n"); 3402 goto disable_snp; 3403 } 3404 3405 pr_info("IOMMU SNP support enabled.\n"); 3406 return; 3407 3408disable_snp: 3409 cc_platform_clear(CC_ATTR_HOST_SEV_SNP); 3410#endif 3411} 3412 3413static void amd_iommu_apply_erratum_snp(void) 3414{ 3415#ifdef CONFIG_KVM_AMD_SEV 3416 if (!amd_iommu_snp_en) 3417 return; 3418 3419 /* Errata fix for Family 0x19 */ 3420 if (boot_cpu_data.x86 != 0x19) 3421 return; 3422 3423 /* Set event log buffer size to max */ 3424 amd_iommu_evtlog_size = EVTLOG_SIZE_MAX; 3425 pr_info("Applying erratum: Increase Event log size to 0x%x\n", 3426 amd_iommu_evtlog_size); 3427 3428 /* 3429 * Set PPR log buffer size to max. 3430 * (Family 0x19, model < 0x10 doesn't support PPR when SNP is enabled). 3431 */ 3432 if (boot_cpu_data.x86_model >= 0x10) { 3433 amd_iommu_pprlog_size = PPRLOG_SIZE_MAX; 3434 pr_info("Applying erratum: Increase PPR log size to 0x%x\n", 3435 amd_iommu_pprlog_size); 3436 } 3437#endif 3438} 3439 3440/**************************************************************************** 3441 * 3442 * AMD IOMMU Initialization State Machine 3443 * 3444 ****************************************************************************/ 3445 3446static int __init state_next(void) 3447{ 3448 int ret = 0; 3449 3450 switch (init_state) { 3451 case IOMMU_START_STATE: 3452 if (!detect_ivrs()) { 3453 init_state = IOMMU_NOT_FOUND; 3454 ret = -ENODEV; 3455 } else { 3456 init_state = IOMMU_IVRS_DETECTED; 3457 } 3458 break; 3459 case IOMMU_IVRS_DETECTED: 3460 if (amd_iommu_disabled) { 3461 init_state = IOMMU_CMDLINE_DISABLED; 3462 ret = -EINVAL; 3463 } else { 3464 ret = early_amd_iommu_init(); 3465 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; 3466 } 3467 break; 3468 case IOMMU_ACPI_FINISHED: 3469 early_enable_iommus(); 3470 x86_platform.iommu_shutdown = disable_iommus; 3471 init_state = IOMMU_ENABLED; 3472 break; 3473 case IOMMU_ENABLED: 3474 register_syscore(&amd_iommu_syscore); 3475 iommu_snp_enable(); 3476 3477 amd_iommu_apply_erratum_snp(); 3478 3479 /* Allocate/enable event log buffer */ 3480 if (is_kdump_kernel()) 3481 ret = remap_event_buffer(); 3482 else 3483 ret = alloc_event_buffer(); 3484 3485 if (ret) { 3486 init_state = IOMMU_INIT_ERROR; 3487 break; 3488 } 3489 iommu_enable_event_buffer(); 3490 3491 ret = amd_iommu_init_pci(); 3492 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; 3493 break; 3494 case IOMMU_PCI_INIT: 3495 ret = amd_iommu_enable_interrupts(); 3496 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN; 3497 break; 3498 case IOMMU_INTERRUPTS_EN: 3499 init_state = IOMMU_INITIALIZED; 3500 break; 3501 case IOMMU_INITIALIZED: 3502 /* Nothing to do */ 3503 break; 3504 case IOMMU_NOT_FOUND: 3505 case IOMMU_INIT_ERROR: 3506 case IOMMU_CMDLINE_DISABLED: 3507 /* Error states => do nothing */ 3508 ret = -EINVAL; 3509 break; 3510 default: 3511 /* Unknown state */ 3512 BUG(); 3513 } 3514 3515 if (ret) { 3516 free_dma_resources(); 3517 if (!irq_remapping_enabled) { 3518 disable_iommus(); 3519 free_iommu_resources(); 3520 } else { 3521 struct amd_iommu *iommu; 3522 struct amd_iommu_pci_seg *pci_seg; 3523 3524 for_each_pci_segment(pci_seg) 3525 uninit_device_table_dma(pci_seg); 3526 3527 for_each_iommu(iommu) 3528 amd_iommu_flush_all_caches(iommu); 3529 } 3530 } 3531 return ret; 3532} 3533 3534static int __init iommu_go_to_state(enum iommu_init_state state) 3535{ 3536 int ret = -EINVAL; 3537 3538 while (init_state != state) { 3539 if (init_state == IOMMU_NOT_FOUND || 3540 init_state == IOMMU_INIT_ERROR || 3541 init_state == IOMMU_CMDLINE_DISABLED) 3542 break; 3543 ret = state_next(); 3544 } 3545 3546 /* 3547 * SNP platform initilazation requires IOMMUs to be fully configured. 3548 * If the SNP support on IOMMUs has NOT been checked, simply mark SNP 3549 * as unsupported. If the SNP support on IOMMUs has been checked and 3550 * host SNP support enabled but RMP enforcement has not been enabled 3551 * in IOMMUs, then the system is in a half-baked state, but can limp 3552 * along as all memory should be Hypervisor-Owned in the RMP. WARN, 3553 * but leave SNP as "supported" to avoid confusing the kernel. 3554 */ 3555 if (ret && cc_platform_has(CC_ATTR_HOST_SEV_SNP) && 3556 !WARN_ON_ONCE(amd_iommu_snp_en)) 3557 cc_platform_clear(CC_ATTR_HOST_SEV_SNP); 3558 3559 return ret; 3560} 3561 3562#ifdef CONFIG_IRQ_REMAP 3563int __init amd_iommu_prepare(void) 3564{ 3565 int ret; 3566 3567 amd_iommu_irq_remap = true; 3568 3569 ret = iommu_go_to_state(IOMMU_ACPI_FINISHED); 3570 if (ret) { 3571 amd_iommu_irq_remap = false; 3572 return ret; 3573 } 3574 3575 return amd_iommu_irq_remap ? 0 : -ENODEV; 3576} 3577 3578int __init amd_iommu_enable(void) 3579{ 3580 int ret; 3581 3582 ret = iommu_go_to_state(IOMMU_ENABLED); 3583 if (ret) 3584 return ret; 3585 3586 irq_remapping_enabled = 1; 3587 return amd_iommu_xt_mode; 3588} 3589 3590void amd_iommu_disable(void) 3591{ 3592 amd_iommu_suspend(NULL); 3593} 3594 3595int amd_iommu_reenable(int mode) 3596{ 3597 amd_iommu_resume(NULL); 3598 3599 return 0; 3600} 3601 3602int amd_iommu_enable_faulting(unsigned int cpu) 3603{ 3604 /* We enable MSI later when PCI is initialized */ 3605 return 0; 3606} 3607#endif 3608 3609/* 3610 * This is the core init function for AMD IOMMU hardware in the system. 3611 * This function is called from the generic x86 DMA layer initialization 3612 * code. 3613 */ 3614static int __init amd_iommu_init(void) 3615{ 3616 int ret; 3617 3618 ret = iommu_go_to_state(IOMMU_INITIALIZED); 3619#ifdef CONFIG_GART_IOMMU 3620 if (ret && list_empty(&amd_iommu_list)) { 3621 /* 3622 * We failed to initialize the AMD IOMMU - try fallback 3623 * to GART if possible. 3624 */ 3625 gart_iommu_init(); 3626 } 3627#endif 3628 3629 if (!ret) 3630 amd_iommu_debugfs_setup(); 3631 3632 return ret; 3633} 3634 3635static bool amd_iommu_sme_check(void) 3636{ 3637 if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) || 3638 (boot_cpu_data.x86 != 0x17)) 3639 return true; 3640 3641 /* For Fam17h, a specific level of support is required */ 3642 if (boot_cpu_data.microcode >= 0x08001205) 3643 return true; 3644 3645 if ((boot_cpu_data.microcode >= 0x08001126) && 3646 (boot_cpu_data.microcode <= 0x080011ff)) 3647 return true; 3648 3649 pr_notice("IOMMU not currently supported when SME is active\n"); 3650 3651 return false; 3652} 3653 3654/**************************************************************************** 3655 * 3656 * Early detect code. This code runs at IOMMU detection time in the DMA 3657 * layer. It just looks if there is an IVRS ACPI table to detect AMD 3658 * IOMMUs 3659 * 3660 ****************************************************************************/ 3661void __init amd_iommu_detect(void) 3662{ 3663 int ret; 3664 3665 if (no_iommu || (iommu_detected && !gart_iommu_aperture)) 3666 goto disable_snp; 3667 3668 if (!amd_iommu_sme_check()) 3669 goto disable_snp; 3670 3671 ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); 3672 if (ret) 3673 goto disable_snp; 3674 3675 amd_iommu_detected = true; 3676 iommu_detected = 1; 3677 x86_init.iommu.iommu_init = amd_iommu_init; 3678 return; 3679 3680disable_snp: 3681 if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) 3682 cc_platform_clear(CC_ATTR_HOST_SEV_SNP); 3683} 3684 3685/**************************************************************************** 3686 * 3687 * Parsing functions for the AMD IOMMU specific kernel command line 3688 * options. 3689 * 3690 ****************************************************************************/ 3691 3692static int __init parse_amd_iommu_dump(char *str) 3693{ 3694 amd_iommu_dump = true; 3695 3696 return 1; 3697} 3698 3699static int __init parse_amd_iommu_intr(char *str) 3700{ 3701 for (; *str; ++str) { 3702 if (strncmp(str, "legacy", 6) == 0) { 3703 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 3704 break; 3705 } 3706 if (strncmp(str, "vapic", 5) == 0) { 3707 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; 3708 break; 3709 } 3710 } 3711 return 1; 3712} 3713 3714static int __init parse_amd_iommu_options(char *str) 3715{ 3716 if (!str) 3717 return -EINVAL; 3718 3719 while (*str) { 3720 if (strncmp(str, "fullflush", 9) == 0) { 3721 pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n"); 3722 iommu_set_dma_strict(); 3723 } else if (strncmp(str, "force_enable", 12) == 0) { 3724 amd_iommu_force_enable = true; 3725 } else if (strncmp(str, "off", 3) == 0) { 3726 amd_iommu_disabled = true; 3727 } else if (strncmp(str, "force_isolation", 15) == 0) { 3728 amd_iommu_force_isolation = true; 3729 } else if (strncmp(str, "pgtbl_v1", 8) == 0) { 3730 amd_iommu_pgtable = PD_MODE_V1; 3731 } else if (strncmp(str, "pgtbl_v2", 8) == 0) { 3732 amd_iommu_pgtable = PD_MODE_V2; 3733 } else if (strncmp(str, "irtcachedis", 11) == 0) { 3734 amd_iommu_irtcachedis = true; 3735 } else if (strncmp(str, "nohugepages", 11) == 0) { 3736 pr_info("Restricting V1 page-sizes to 4KiB"); 3737 amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_4K; 3738 } else if (strncmp(str, "v2_pgsizes_only", 15) == 0) { 3739 pr_info("Restricting V1 page-sizes to 4KiB/2MiB/1GiB"); 3740 amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; 3741 } else { 3742 pr_notice("Unknown option - '%s'\n", str); 3743 } 3744 3745 str += strcspn(str, ","); 3746 while (*str == ',') 3747 str++; 3748 } 3749 3750 return 1; 3751} 3752 3753static int __init parse_ivrs_ioapic(char *str) 3754{ 3755 u32 seg = 0, bus, dev, fn; 3756 int id, i; 3757 u32 devid; 3758 3759 if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3760 sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) 3761 goto found; 3762 3763 if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3764 sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { 3765 pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n", 3766 str, id, seg, bus, dev, fn); 3767 goto found; 3768 } 3769 3770 pr_err("Invalid command line: ivrs_ioapic%s\n", str); 3771 return 1; 3772 3773found: 3774 if (early_ioapic_map_size == EARLY_MAP_SIZE) { 3775 pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n", 3776 str); 3777 return 1; 3778 } 3779 3780 devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); 3781 3782 cmdline_maps = true; 3783 i = early_ioapic_map_size++; 3784 early_ioapic_map[i].id = id; 3785 early_ioapic_map[i].devid = devid; 3786 early_ioapic_map[i].cmd_line = true; 3787 3788 return 1; 3789} 3790 3791static int __init parse_ivrs_hpet(char *str) 3792{ 3793 u32 seg = 0, bus, dev, fn; 3794 int id, i; 3795 u32 devid; 3796 3797 if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3798 sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) 3799 goto found; 3800 3801 if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3802 sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { 3803 pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n", 3804 str, id, seg, bus, dev, fn); 3805 goto found; 3806 } 3807 3808 pr_err("Invalid command line: ivrs_hpet%s\n", str); 3809 return 1; 3810 3811found: 3812 if (early_hpet_map_size == EARLY_MAP_SIZE) { 3813 pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n", 3814 str); 3815 return 1; 3816 } 3817 3818 devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); 3819 3820 cmdline_maps = true; 3821 i = early_hpet_map_size++; 3822 early_hpet_map[i].id = id; 3823 early_hpet_map[i].devid = devid; 3824 early_hpet_map[i].cmd_line = true; 3825 3826 return 1; 3827} 3828 3829#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN) 3830 3831static int __init parse_ivrs_acpihid(char *str) 3832{ 3833 u32 seg = 0, bus, dev, fn; 3834 char *hid, *uid, *p, *addr; 3835 char acpiid[ACPIID_LEN + 1] = { }; /* size with NULL terminator */ 3836 int i; 3837 3838 addr = strchr(str, '@'); 3839 if (!addr) { 3840 addr = strchr(str, '='); 3841 if (!addr) 3842 goto not_found; 3843 3844 ++addr; 3845 3846 if (strlen(addr) > ACPIID_LEN) 3847 goto not_found; 3848 3849 if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 || 3850 sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) { 3851 pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n", 3852 str, acpiid, seg, bus, dev, fn); 3853 goto found; 3854 } 3855 goto not_found; 3856 } 3857 3858 /* We have the '@', make it the terminator to get just the acpiid */ 3859 *addr++ = 0; 3860 3861 if (strlen(str) > ACPIID_LEN) 3862 goto not_found; 3863 3864 if (sscanf(str, "=%s", acpiid) != 1) 3865 goto not_found; 3866 3867 if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 || 3868 sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4) 3869 goto found; 3870 3871not_found: 3872 pr_err("Invalid command line: ivrs_acpihid%s\n", str); 3873 return 1; 3874 3875found: 3876 p = acpiid; 3877 hid = strsep(&p, ":"); 3878 uid = p; 3879 3880 if (!hid || !(*hid) || !uid) { 3881 pr_err("Invalid command line: hid or uid\n"); 3882 return 1; 3883 } 3884 3885 /* 3886 * Ignore leading zeroes after ':', so e.g., AMDI0095:00 3887 * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match 3888 */ 3889 while (*uid == '0' && *(uid + 1)) 3890 uid++; 3891 3892 if (strlen(hid) >= ACPIHID_HID_LEN) { 3893 pr_err("Invalid command line: hid is too long\n"); 3894 return 1; 3895 } else if (strlen(uid) >= ACPIHID_UID_LEN) { 3896 pr_err("Invalid command line: uid is too long\n"); 3897 return 1; 3898 } 3899 3900 i = early_acpihid_map_size++; 3901 memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); 3902 memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); 3903 early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); 3904 early_acpihid_map[i].cmd_line = true; 3905 3906 return 1; 3907} 3908 3909__setup("amd_iommu_dump", parse_amd_iommu_dump); 3910__setup("amd_iommu=", parse_amd_iommu_options); 3911__setup("amd_iommu_intr=", parse_amd_iommu_intr); 3912__setup("ivrs_ioapic", parse_ivrs_ioapic); 3913__setup("ivrs_hpet", parse_ivrs_hpet); 3914__setup("ivrs_acpihid", parse_ivrs_acpihid); 3915 3916bool amd_iommu_pasid_supported(void) 3917{ 3918 /* CPU page table size should match IOMMU guest page table size */ 3919 if (cpu_feature_enabled(X86_FEATURE_LA57) && 3920 amd_iommu_gpt_level != PAGE_MODE_5_LEVEL) 3921 return false; 3922 3923 /* 3924 * Since DTE[Mode]=0 is prohibited on SNP-enabled system 3925 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without 3926 * setting up IOMMUv1 page table. 3927 */ 3928 return amd_iommu_gt_ppr_supported() && !amd_iommu_snp_en; 3929} 3930 3931struct amd_iommu *get_amd_iommu(unsigned int idx) 3932{ 3933 unsigned int i = 0; 3934 struct amd_iommu *iommu; 3935 3936 for_each_iommu(iommu) 3937 if (i++ == idx) 3938 return iommu; 3939 return NULL; 3940} 3941 3942/**************************************************************************** 3943 * 3944 * IOMMU EFR Performance Counter support functionality. This code allows 3945 * access to the IOMMU PC functionality. 3946 * 3947 ****************************************************************************/ 3948 3949u8 amd_iommu_pc_get_max_banks(unsigned int idx) 3950{ 3951 struct amd_iommu *iommu = get_amd_iommu(idx); 3952 3953 if (iommu) 3954 return iommu->max_banks; 3955 3956 return 0; 3957} 3958 3959bool amd_iommu_pc_supported(void) 3960{ 3961 return amd_iommu_pc_present; 3962} 3963 3964u8 amd_iommu_pc_get_max_counters(unsigned int idx) 3965{ 3966 struct amd_iommu *iommu = get_amd_iommu(idx); 3967 3968 if (iommu) 3969 return iommu->max_counters; 3970 3971 return 0; 3972} 3973 3974static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, 3975 u8 fxn, u64 *value, bool is_write) 3976{ 3977 u32 offset; 3978 u32 max_offset_lim; 3979 3980 /* Make sure the IOMMU PC resource is available */ 3981 if (!amd_iommu_pc_present) 3982 return -ENODEV; 3983 3984 /* Check for valid iommu and pc register indexing */ 3985 if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7))) 3986 return -ENODEV; 3987 3988 offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn); 3989 3990 /* Limit the offset to the hw defined mmio region aperture */ 3991 max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) | 3992 (iommu->max_counters << 8) | 0x28); 3993 if ((offset < MMIO_CNTR_REG_OFFSET) || 3994 (offset > max_offset_lim)) 3995 return -EINVAL; 3996 3997 if (is_write) { 3998 u64 val = *value & GENMASK_ULL(47, 0); 3999 4000 writel((u32)val, iommu->mmio_base + offset); 4001 writel((val >> 32), iommu->mmio_base + offset + 4); 4002 } else { 4003 *value = readl(iommu->mmio_base + offset + 4); 4004 *value <<= 32; 4005 *value |= readl(iommu->mmio_base + offset); 4006 *value &= GENMASK_ULL(47, 0); 4007 } 4008 4009 return 0; 4010} 4011 4012int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) 4013{ 4014 if (!iommu) 4015 return -EINVAL; 4016 4017 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false); 4018} 4019 4020int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) 4021{ 4022 if (!iommu) 4023 return -EINVAL; 4024 4025 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); 4026} 4027 4028#ifdef CONFIG_KVM_AMD_SEV 4029static int iommu_page_make_shared(void *page) 4030{ 4031 unsigned long paddr, pfn; 4032 4033 paddr = iommu_virt_to_phys(page); 4034 /* Cbit maybe set in the paddr */ 4035 pfn = __sme_clr(paddr) >> PAGE_SHIFT; 4036 4037 if (!(pfn % PTRS_PER_PMD)) { 4038 int ret, level; 4039 bool assigned; 4040 4041 ret = snp_lookup_rmpentry(pfn, &assigned, &level); 4042 if (ret) { 4043 pr_warn("IOMMU PFN %lx RMP lookup failed, ret %d\n", pfn, ret); 4044 return ret; 4045 } 4046 4047 if (!assigned) { 4048 pr_warn("IOMMU PFN %lx not assigned in RMP table\n", pfn); 4049 return -EINVAL; 4050 } 4051 4052 if (level > PG_LEVEL_4K) { 4053 ret = psmash(pfn); 4054 if (!ret) 4055 goto done; 4056 4057 pr_warn("PSMASH failed for IOMMU PFN %lx huge RMP entry, ret: %d, level: %d\n", 4058 pfn, ret, level); 4059 return ret; 4060 } 4061 } 4062 4063done: 4064 return rmp_make_shared(pfn, PG_LEVEL_4K); 4065} 4066 4067static int iommu_make_shared(void *va, size_t size) 4068{ 4069 void *page; 4070 int ret; 4071 4072 if (!va) 4073 return 0; 4074 4075 for (page = va; page < (va + size); page += PAGE_SIZE) { 4076 ret = iommu_page_make_shared(page); 4077 if (ret) 4078 return ret; 4079 } 4080 4081 return 0; 4082} 4083 4084int amd_iommu_snp_disable(void) 4085{ 4086 struct amd_iommu *iommu; 4087 int ret; 4088 4089 if (!amd_iommu_snp_en) 4090 return 0; 4091 4092 for_each_iommu(iommu) { 4093 ret = iommu_make_shared(iommu->evt_buf, amd_iommu_evtlog_size); 4094 if (ret) 4095 return ret; 4096 4097 ret = iommu_make_shared(iommu->ppr_log, amd_iommu_pprlog_size); 4098 if (ret) 4099 return ret; 4100 4101 ret = iommu_make_shared((void *)iommu->cmd_sem, PAGE_SIZE); 4102 if (ret) 4103 return ret; 4104 } 4105 4106 return 0; 4107} 4108EXPORT_SYMBOL_GPL(amd_iommu_snp_disable); 4109 4110bool amd_iommu_sev_tio_supported(void) 4111{ 4112 return check_feature2(FEATURE_SEVSNPIO_SUP); 4113} 4114EXPORT_SYMBOL_GPL(amd_iommu_sev_tio_supported); 4115#endif