Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

powerpc/iommu: Reimplement the iommu_table_group_ops for pSeries

PPC64 IOMMU API defines iommu_table_group_ops which handles DMA
windows for PEs, their ownership transfer, create/set/unset the TCE
tables for the Dynamic DMA wundows(DDW). VFIOS uses these APIs for
support on POWER.

The commit 9d67c9433509 ("powerpc/iommu: Add "borrowing"
iommu_table_group_ops") implemented partial support for this API with
"borrow" mechanism wherein the DMA windows if created already by the
host driver, they would be available for VFIO to use. Also, it didn't
have the support to control/modify the window size or the IO page
size.

The current patch implements all the necessary iommu_table_group_ops
APIs there by avoiding the "borrrowing". So, just the way it is on the
PowerNV platform, with this patch the iommu table group ownership is
transferred to the VFIO PPC subdriver, the iommu table, DMA windows
creation/deletion all driven through the APIs.

The pSeries uses the query-pe-dma-window, create-pe-dma-window and
reset-pe-dma-window RTAS calls for DMA window creation, deletion and
reset to defaul. The RTAs calls do show some minor differences to the
way things are to be handled on the pSeries which are listed below.

* On pSeries, the default DMA window size is "fixed" cannot be custom
sized as requested by the user. For non-SRIOV VFs, It is fixed at 2GB
and for SRIOV VFs, its variable sized based on the capacity assigned
to it during the VF assignment to the LPAR. So, for the default DMA
window alone the size if requested less than tce32_size, the smaller
size is enforced using the iommu table->it_size.

* The DMA start address for 32-bit window is 0, and for the 64-bit
window in case of PowerNV is hardcoded to TVE select (bit 59) at 512PiB
offset. This address is returned at the time of create_table() API call
(even before the window is created), the subsequent set_window() call
actually opens the DMA window. On pSeries, the DMA start address for
32-bit window is known from the 'ibm,dma-window' DT property. However,
the 64-bit window start address is not known until the create-pe-dma
RTAS call is made. So, the create_table() which returns the DMA window
start address actually opens the DMA window and returns the DMA start
address as returned by the Hypervisor for the create-pe-dma RTAS call.

* The reset-pe-dma RTAS call resets the DMA windows and restores the
default DMA window, however it does not clear the TCE table entries
if there are any. In case of ownership transfer from platform domain
which used direct mapping, the patch chooses remove-pe-dma instead of
reset-pe for the 64-bit window intentionally so that the
clear_dma_window() is called.

Other than the DMA window management changes mentioned above, the
patch also brings back the userspace view for the single level TCE
as it existed before commit 090bad39b237a ("powerpc/powernv: Add
indirect levels to it_userspace") along with the relavent
refactoring.

Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://msgid.link/171923275958.1397.907964437142542242.stgit@linux.ibm.com

authored by

Shivaprasad G Bhat and committed by
Michael Ellerman
f431a8cd 35146ead

+548 -103
+2 -2
arch/powerpc/include/asm/iommu.h
··· 183 183 long (*unset_window)(struct iommu_table_group *table_group, 184 184 int num); 185 185 /* Switch ownership from platform code to external user (e.g. VFIO) */ 186 - long (*take_ownership)(struct iommu_table_group *table_group); 186 + long (*take_ownership)(struct iommu_table_group *table_group, struct device *dev); 187 187 /* Switch ownership from external user (e.g. VFIO) back to core */ 188 - void (*release_ownership)(struct iommu_table_group *table_group); 188 + void (*release_ownership)(struct iommu_table_group *table_group, struct device *dev); 189 189 }; 190 190 191 191 struct iommu_table_group_link {
+2 -2
arch/powerpc/kernel/iommu.c
··· 1171 1171 * The domain being set to PLATFORM from earlier 1172 1172 * BLOCKED. The table_group ownership has to be released. 1173 1173 */ 1174 - table_group->ops->release_ownership(table_group); 1174 + table_group->ops->release_ownership(table_group, dev); 1175 1175 iommu_group_put(grp); 1176 1176 1177 1177 return 0; ··· 1199 1199 * also sets the dma_api ops 1200 1200 */ 1201 1201 table_group = iommu_group_get_iommudata(grp); 1202 - ret = table_group->ops->take_ownership(table_group); 1202 + ret = table_group->ops->take_ownership(table_group, dev); 1203 1203 iommu_group_put(grp); 1204 1204 1205 1205 return ret;
+4 -2
arch/powerpc/platforms/powernv/pci-ioda.c
··· 1537 1537 } 1538 1538 } 1539 1539 1540 - static long pnv_ioda2_take_ownership(struct iommu_table_group *table_group) 1540 + static long pnv_ioda2_take_ownership(struct iommu_table_group *table_group, 1541 + struct device *dev __maybe_unused) 1541 1542 { 1542 1543 struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, 1543 1544 table_group); ··· 1563 1562 return 0; 1564 1563 } 1565 1564 1566 - static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) 1565 + static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group, 1566 + struct device *dev __maybe_unused) 1567 1567 { 1568 1568 struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, 1569 1569 table_group);
+540 -97
arch/powerpc/platforms/pseries/iommu.c
··· 21 21 #include <linux/dma-mapping.h> 22 22 #include <linux/crash_dump.h> 23 23 #include <linux/memory.h> 24 + #include <linux/vmalloc.h> 24 25 #include <linux/of.h> 25 26 #include <linux/of_address.h> 26 27 #include <linux/iommu.h> ··· 54 53 DDW_EXT_RESET_DMA_WIN = 1, 55 54 DDW_EXT_QUERY_OUT_SIZE = 2 56 55 }; 57 - 58 - static int iommu_take_ownership(struct iommu_table *tbl) 59 - { 60 - unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; 61 - int ret = 0; 62 - 63 - /* 64 - * VFIO does not control TCE entries allocation and the guest 65 - * can write new TCEs on top of existing ones so iommu_tce_build() 66 - * must be able to release old pages. This functionality 67 - * requires exchange() callback defined so if it is not 68 - * implemented, we disallow taking ownership over the table. 69 - */ 70 - if (!tbl->it_ops->xchg_no_kill) 71 - return -EINVAL; 72 - 73 - spin_lock_irqsave(&tbl->large_pool.lock, flags); 74 - for (i = 0; i < tbl->nr_pools; i++) 75 - spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); 76 - 77 - if (iommu_table_in_use(tbl)) { 78 - pr_err("iommu_tce: it_map is not empty"); 79 - ret = -EBUSY; 80 - } else { 81 - memset(tbl->it_map, 0xff, sz); 82 - } 83 - 84 - for (i = 0; i < tbl->nr_pools; i++) 85 - spin_unlock(&tbl->pools[i].lock); 86 - spin_unlock_irqrestore(&tbl->large_pool.lock, flags); 87 - 88 - return ret; 89 - } 90 - 91 - static void iommu_release_ownership(struct iommu_table *tbl) 92 - { 93 - unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; 94 - 95 - spin_lock_irqsave(&tbl->large_pool.lock, flags); 96 - for (i = 0; i < tbl->nr_pools; i++) 97 - spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); 98 - 99 - memset(tbl->it_map, 0, sz); 100 - 101 - iommu_table_reserve_pages(tbl, tbl->it_reserved_start, 102 - tbl->it_reserved_end); 103 - 104 - for (i = 0; i < tbl->nr_pools; i++) 105 - spin_unlock(&tbl->pools[i].lock); 106 - spin_unlock_irqrestore(&tbl->large_pool.lock, flags); 107 - } 108 56 109 57 static struct iommu_table *iommu_pseries_alloc_table(int node) 110 58 { ··· 146 196 } 147 197 148 198 149 - static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) 199 + static void tce_clear_pSeries(struct iommu_table *tbl, long index, long npages) 150 200 { 151 201 __be64 *tcep; 152 202 ··· 163 213 tcep = ((__be64 *)tbl->it_base) + index; 164 214 165 215 return be64_to_cpu(*tcep); 216 + } 217 + 218 + static long pseries_tce_iommu_userspace_view_alloc(struct iommu_table *tbl) 219 + { 220 + unsigned long cb = ALIGN(sizeof(tbl->it_userspace[0]) * tbl->it_size, PAGE_SIZE); 221 + unsigned long *uas; 222 + 223 + if (tbl->it_indirect_levels) /* Impossible */ 224 + return -EPERM; 225 + 226 + WARN_ON(tbl->it_userspace); 227 + 228 + uas = vzalloc(cb); 229 + if (!uas) 230 + return -ENOMEM; 231 + 232 + tbl->it_userspace = (__be64 *) uas; 233 + 234 + return 0; 235 + } 236 + 237 + static void tce_iommu_userspace_view_free(struct iommu_table *tbl) 238 + { 239 + vfree(tbl->it_userspace); 240 + tbl->it_userspace = NULL; 241 + } 242 + 243 + static void tce_free_pSeries(struct iommu_table *tbl) 244 + { 245 + if (!tbl->it_userspace) 246 + tce_iommu_userspace_view_free(tbl); 166 247 } 167 248 168 249 static void tce_free_pSeriesLP(unsigned long liobn, long, long, long); ··· 610 629 611 630 struct iommu_table_ops iommu_table_pseries_ops = { 612 631 .set = tce_build_pSeries, 613 - .clear = tce_free_pSeries, 632 + .clear = tce_clear_pSeries, 614 633 .get = tce_get_pseries 615 634 }; 616 635 ··· 719 738 720 739 return rc; 721 740 } 741 + 742 + static __be64 *tce_useraddr_pSeriesLP(struct iommu_table *tbl, long index, 743 + bool __always_unused alloc) 744 + { 745 + return tbl->it_userspace ? &tbl->it_userspace[index - tbl->it_offset] : NULL; 746 + } 722 747 #endif 723 748 724 749 struct iommu_table_ops iommu_table_lpar_multi_ops = { 725 750 .set = tce_buildmulti_pSeriesLP, 726 751 #ifdef CONFIG_IOMMU_API 727 752 .xchg_no_kill = tce_exchange_pseries, 753 + .useraddrptr = tce_useraddr_pSeriesLP, 728 754 #endif 729 755 .clear = tce_freemulti_pSeriesLP, 730 - .get = tce_get_pSeriesLP 756 + .get = tce_get_pSeriesLP, 757 + .free = tce_free_pSeries 731 758 }; 759 + 760 + /* 761 + * When the DMA window properties might have been removed, 762 + * the parent node has the table_group setup on it. 763 + */ 764 + static struct device_node *pci_dma_find_parent_node(struct pci_dev *dev, 765 + struct iommu_table_group *table_group) 766 + { 767 + struct device_node *dn = pci_device_to_OF_node(dev); 768 + struct pci_dn *rpdn; 769 + 770 + for (; dn && PCI_DN(dn); dn = dn->parent) { 771 + rpdn = PCI_DN(dn); 772 + 773 + if (table_group == rpdn->table_group) 774 + return dn; 775 + } 776 + 777 + return NULL; 778 + } 732 779 733 780 /* 734 781 * Find nearest ibm,dma-window (default DMA window) or direct DMA window or ··· 972 963 } 973 964 974 965 static void remove_dma_window(struct device_node *np, u32 *ddw_avail, 975 - struct property *win) 966 + struct property *win, bool cleanup) 976 967 { 977 968 struct dynamic_dma_window_prop *dwp; 978 969 u64 liobn; ··· 980 971 dwp = win->value; 981 972 liobn = (u64)be32_to_cpu(dwp->liobn); 982 973 983 - clean_dma_window(np, dwp); 974 + if (cleanup) 975 + clean_dma_window(np, dwp); 984 976 __remove_dma_window(np, ddw_avail, liobn); 985 977 } 986 978 987 - static int remove_ddw(struct device_node *np, bool remove_prop, const char *win_name) 979 + static void copy_property(struct device_node *pdn, const char *from, const char *to) 980 + { 981 + struct property *src, *dst; 982 + 983 + src = of_find_property(pdn, from, NULL); 984 + if (!src) 985 + return; 986 + 987 + dst = kzalloc(sizeof(*dst), GFP_KERNEL); 988 + if (!dst) 989 + return; 990 + 991 + dst->name = kstrdup(to, GFP_KERNEL); 992 + dst->value = kmemdup(src->value, src->length, GFP_KERNEL); 993 + dst->length = src->length; 994 + if (!dst->name || !dst->value) 995 + return; 996 + 997 + if (of_add_property(pdn, dst)) { 998 + pr_err("Unable to add DMA window property for %pOF", pdn); 999 + goto free_prop; 1000 + } 1001 + 1002 + return; 1003 + 1004 + free_prop: 1005 + kfree(dst->name); 1006 + kfree(dst->value); 1007 + kfree(dst); 1008 + } 1009 + 1010 + static int remove_dma_window_named(struct device_node *np, bool remove_prop, const char *win_name, 1011 + bool cleanup) 988 1012 { 989 1013 struct property *win; 990 1014 u32 ddw_avail[DDW_APPLICABLE_SIZE]; ··· 1032 990 if (ret) 1033 991 return 0; 1034 992 1035 - 1036 993 if (win->length >= sizeof(struct dynamic_dma_window_prop)) 1037 - remove_dma_window(np, ddw_avail, win); 994 + remove_dma_window(np, ddw_avail, win, cleanup); 1038 995 1039 996 if (!remove_prop) 1040 997 return 0; 998 + 999 + /* Default window property if removed is lost as reset-pe doesn't restore it. 1000 + * Though FDT has a copy of it, the DLPAR hotplugged devices will not have a 1001 + * node on FDT until next reboot. So, back it up. 1002 + */ 1003 + if ((strcmp(win_name, "ibm,dma-window") == 0) && 1004 + !of_find_property(np, "ibm,dma-window-saved", NULL)) 1005 + copy_property(np, win_name, "ibm,dma-window-saved"); 1041 1006 1042 1007 ret = of_remove_property(np, win); 1043 1008 if (ret) ··· 1103 1054 for_each_node_with_property(pdn, name) { 1104 1055 dma64 = of_get_property(pdn, name, &len); 1105 1056 if (!dma64 || len < sizeof(*dma64)) { 1106 - remove_ddw(pdn, true, name); 1057 + remove_dma_window_named(pdn, true, name, true); 1107 1058 continue; 1108 1059 } 1109 1060 ··· 1481 1432 if (reset_win_ext) 1482 1433 goto out_failed; 1483 1434 1484 - remove_dma_window(pdn, ddw_avail, default_win); 1435 + remove_dma_window(pdn, ddw_avail, default_win, true); 1485 1436 default_win_removed = true; 1486 1437 1487 1438 /* Query again, to check if the window is available */ ··· 1653 1604 1654 1605 if (default_win_removed) { 1655 1606 /* default_win is valid here because default_win_removed == true */ 1607 + if (!of_find_property(pdn, "ibm,dma-window-saved", NULL)) 1608 + copy_property(pdn, "ibm,dma-window", "ibm,dma-window-saved"); 1656 1609 of_remove_property(pdn, default_win); 1657 1610 dev_info(&dev->dev, "Removed default DMA window for %pOF\n", pdn); 1658 1611 } ··· 1862 1811 return size; 1863 1812 } 1864 1813 1814 + static struct pci_dev *iommu_group_get_first_pci_dev(struct iommu_group *group) 1815 + { 1816 + struct pci_dev *pdev = NULL; 1817 + int ret; 1818 + 1819 + /* No IOMMU group ? */ 1820 + if (!group) 1821 + return NULL; 1822 + 1823 + ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); 1824 + if (!ret || !pdev) 1825 + return NULL; 1826 + return pdev; 1827 + } 1828 + 1829 + static void restore_default_dma_window(struct pci_dev *pdev, struct device_node *pdn) 1830 + { 1831 + reset_dma_window(pdev, pdn); 1832 + copy_property(pdn, "ibm,dma-window-saved", "ibm,dma-window"); 1833 + } 1834 + 1835 + static long remove_dynamic_dma_windows(struct pci_dev *pdev, struct device_node *pdn) 1836 + { 1837 + struct pci_dn *pci = PCI_DN(pdn); 1838 + struct dma_win *window; 1839 + bool direct_mapping; 1840 + int len; 1841 + 1842 + if (find_existing_ddw(pdn, &pdev->dev.archdata.dma_offset, &len, &direct_mapping)) { 1843 + remove_dma_window_named(pdn, true, direct_mapping ? 1844 + DIRECT64_PROPNAME : DMA64_PROPNAME, true); 1845 + if (!direct_mapping) { 1846 + WARN_ON(!pci->table_group->tables[0] && !pci->table_group->tables[1]); 1847 + 1848 + if (pci->table_group->tables[1]) { 1849 + iommu_tce_table_put(pci->table_group->tables[1]); 1850 + pci->table_group->tables[1] = NULL; 1851 + } else if (pci->table_group->tables[0]) { 1852 + /* Default window was removed and only the DDW exists */ 1853 + iommu_tce_table_put(pci->table_group->tables[0]); 1854 + pci->table_group->tables[0] = NULL; 1855 + } 1856 + } 1857 + spin_lock(&dma_win_list_lock); 1858 + list_for_each_entry(window, &dma_win_list, list) { 1859 + if (window->device == pdn) { 1860 + list_del(&window->list); 1861 + kfree(window); 1862 + break; 1863 + } 1864 + } 1865 + spin_unlock(&dma_win_list_lock); 1866 + } 1867 + 1868 + return 0; 1869 + } 1870 + 1871 + static long pseries_setup_default_iommu_config(struct iommu_table_group *table_group, 1872 + struct device *dev) 1873 + { 1874 + struct pci_dev *pdev = to_pci_dev(dev); 1875 + const __be32 *default_prop; 1876 + long liobn, offset, size; 1877 + struct device_node *pdn; 1878 + struct iommu_table *tbl; 1879 + struct pci_dn *pci; 1880 + 1881 + pdn = pci_dma_find_parent_node(pdev, table_group); 1882 + if (!pdn || !PCI_DN(pdn)) { 1883 + dev_warn(&pdev->dev, "No table_group configured for the node %pOF\n", pdn); 1884 + return -1; 1885 + } 1886 + pci = PCI_DN(pdn); 1887 + 1888 + /* The default window is restored if not present already on removal of DDW. 1889 + * However, if used by VFIO SPAPR sub driver, the user's order of removal of 1890 + * windows might have been different to not leading to auto restoration, 1891 + * suppose the DDW was removed first followed by the default one. 1892 + * So, restore the default window with reset-pe-dma call explicitly. 1893 + */ 1894 + restore_default_dma_window(pdev, pdn); 1895 + 1896 + default_prop = of_get_property(pdn, "ibm,dma-window", NULL); 1897 + of_parse_dma_window(pdn, default_prop, &liobn, &offset, &size); 1898 + tbl = iommu_pseries_alloc_table(pci->phb->node); 1899 + if (!tbl) { 1900 + dev_err(&pdev->dev, "couldn't create new IOMMU table\n"); 1901 + return -1; 1902 + } 1903 + 1904 + iommu_table_setparms_common(tbl, pci->phb->bus->number, liobn, offset, 1905 + size, IOMMU_PAGE_SHIFT_4K, NULL, 1906 + &iommu_table_lpar_multi_ops); 1907 + iommu_init_table(tbl, pci->phb->node, 0, 0); 1908 + 1909 + pci->table_group->tables[0] = tbl; 1910 + set_iommu_table_base(&pdev->dev, tbl); 1911 + 1912 + return 0; 1913 + } 1914 + 1915 + static bool is_default_window_request(struct iommu_table_group *table_group, __u32 page_shift, 1916 + __u64 window_size) 1917 + { 1918 + if ((window_size <= table_group->tce32_size) && 1919 + (page_shift == IOMMU_PAGE_SHIFT_4K)) 1920 + return true; 1921 + 1922 + return false; 1923 + } 1924 + 1865 1925 static long spapr_tce_create_table(struct iommu_table_group *table_group, int num, 1866 1926 __u32 page_shift, __u64 window_size, __u32 levels, 1867 1927 struct iommu_table **ptbl) 1868 1928 { 1869 - struct iommu_table *tbl = table_group->tables[0]; 1929 + struct pci_dev *pdev = iommu_group_get_first_pci_dev(table_group->group); 1930 + u32 ddw_avail[DDW_APPLICABLE_SIZE]; 1931 + struct ddw_create_response create; 1932 + unsigned long liobn, offset, size; 1933 + unsigned long start = 0, end = 0; 1934 + struct ddw_query_response query; 1935 + const __be32 *default_prop; 1936 + struct failed_ddw_pdn *fpdn; 1937 + unsigned int window_shift; 1938 + struct device_node *pdn; 1939 + struct iommu_table *tbl; 1940 + struct dma_win *window; 1941 + struct property *win64; 1942 + struct pci_dn *pci; 1943 + u64 win_addr; 1944 + int len, i; 1945 + long ret; 1870 1946 1871 - if (num > 0) 1872 - return -EPERM; 1873 - 1874 - if (tbl->it_page_shift != page_shift || 1875 - tbl->it_size != (window_size >> page_shift) || 1876 - tbl->it_indirect_levels != levels - 1) 1947 + if (!is_power_of_2(window_size) || levels > 1) 1877 1948 return -EINVAL; 1878 1949 1950 + window_shift = order_base_2(window_size); 1951 + 1952 + mutex_lock(&dma_win_init_mutex); 1953 + 1954 + ret = -ENODEV; 1955 + 1956 + pdn = pci_dma_find_parent_node(pdev, table_group); 1957 + if (!pdn || !PCI_DN(pdn)) { /* Niether of 32s|64-bit exist! */ 1958 + dev_warn(&pdev->dev, "No dma-windows exist for the node %pOF\n", pdn); 1959 + goto out_failed; 1960 + } 1961 + pci = PCI_DN(pdn); 1962 + 1963 + /* If the enable DDW failed for the pdn, dont retry! */ 1964 + list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) { 1965 + if (fpdn->pdn == pdn) { 1966 + dev_info(&pdev->dev, "%pOF in failed DDW device list\n", pdn); 1967 + goto out_unlock; 1968 + } 1969 + } 1970 + 1971 + tbl = iommu_pseries_alloc_table(pci->phb->node); 1972 + if (!tbl) { 1973 + dev_dbg(&pdev->dev, "couldn't create new IOMMU table\n"); 1974 + goto out_unlock; 1975 + } 1976 + 1977 + if (num == 0) { 1978 + bool direct_mapping; 1979 + /* The request is not for default window? Ensure there is no DDW window already */ 1980 + if (!is_default_window_request(table_group, page_shift, window_size)) { 1981 + if (find_existing_ddw(pdn, &pdev->dev.archdata.dma_offset, &len, 1982 + &direct_mapping)) { 1983 + dev_warn(&pdev->dev, "%pOF: 64-bit window already present.", pdn); 1984 + ret = -EPERM; 1985 + goto out_unlock; 1986 + } 1987 + } else { 1988 + /* Request is for Default window, ensure there is no DDW if there is a 1989 + * need to reset. reset-pe otherwise removes the DDW also 1990 + */ 1991 + default_prop = of_get_property(pdn, "ibm,dma-window", NULL); 1992 + if (!default_prop) { 1993 + if (find_existing_ddw(pdn, &pdev->dev.archdata.dma_offset, &len, 1994 + &direct_mapping)) { 1995 + dev_warn(&pdev->dev, "%pOF: Attempt to create window#0 when 64-bit window is present. Preventing the attempt as that would destroy the 64-bit window", 1996 + pdn); 1997 + ret = -EPERM; 1998 + goto out_unlock; 1999 + } 2000 + 2001 + restore_default_dma_window(pdev, pdn); 2002 + 2003 + default_prop = of_get_property(pdn, "ibm,dma-window", NULL); 2004 + of_parse_dma_window(pdn, default_prop, &liobn, &offset, &size); 2005 + /* Limit the default window size to window_size */ 2006 + iommu_table_setparms_common(tbl, pci->phb->bus->number, liobn, 2007 + offset, 1UL << window_shift, 2008 + IOMMU_PAGE_SHIFT_4K, NULL, 2009 + &iommu_table_lpar_multi_ops); 2010 + iommu_init_table(tbl, pci->phb->node, start, end); 2011 + 2012 + table_group->tables[0] = tbl; 2013 + 2014 + mutex_unlock(&dma_win_init_mutex); 2015 + 2016 + goto exit; 2017 + } 2018 + } 2019 + } 2020 + 2021 + ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", 2022 + &ddw_avail[0], DDW_APPLICABLE_SIZE); 2023 + if (ret) { 2024 + dev_info(&pdev->dev, "ibm,ddw-applicable not found\n"); 2025 + goto out_failed; 2026 + } 2027 + ret = -ENODEV; 2028 + 2029 + pr_err("%s: Calling query %pOF\n", __func__, pdn); 2030 + ret = query_ddw(pdev, ddw_avail, &query, pdn); 2031 + if (ret) 2032 + goto out_failed; 2033 + ret = -ENODEV; 2034 + 2035 + len = window_shift; 2036 + if (query.largest_available_block < (1ULL << (len - page_shift))) { 2037 + dev_dbg(&pdev->dev, "can't map window 0x%llx with %llu %llu-sized pages\n", 2038 + 1ULL << len, query.largest_available_block, 2039 + 1ULL << page_shift); 2040 + ret = -EINVAL; /* Retry with smaller window size */ 2041 + goto out_unlock; 2042 + } 2043 + 2044 + if (create_ddw(pdev, ddw_avail, &create, page_shift, len)) { 2045 + pr_err("%s: Create ddw failed %pOF\n", __func__, pdn); 2046 + goto out_failed; 2047 + } 2048 + 2049 + win_addr = ((u64)create.addr_hi << 32) | create.addr_lo; 2050 + win64 = ddw_property_create(DMA64_PROPNAME, create.liobn, win_addr, page_shift, len); 2051 + if (!win64) 2052 + goto remove_window; 2053 + 2054 + ret = of_add_property(pdn, win64); 2055 + if (ret) { 2056 + dev_err(&pdev->dev, "unable to add DMA window property for %pOF: %ld", pdn, ret); 2057 + goto free_property; 2058 + } 2059 + ret = -ENODEV; 2060 + 2061 + window = ddw_list_new_entry(pdn, win64->value); 2062 + if (!window) 2063 + goto remove_property; 2064 + 2065 + window->direct = false; 2066 + 2067 + for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) { 2068 + const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM; 2069 + 2070 + /* Look for MMIO32 */ 2071 + if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) { 2072 + start = pci->phb->mem_resources[i].start; 2073 + end = pci->phb->mem_resources[i].end; 2074 + break; 2075 + } 2076 + } 2077 + 2078 + /* New table for using DDW instead of the default DMA window */ 2079 + iommu_table_setparms_common(tbl, pci->phb->bus->number, create.liobn, win_addr, 2080 + 1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops); 2081 + iommu_init_table(tbl, pci->phb->node, start, end); 2082 + 2083 + pci->table_group->tables[num] = tbl; 2084 + set_iommu_table_base(&pdev->dev, tbl); 2085 + pdev->dev.archdata.dma_offset = win_addr; 2086 + 2087 + spin_lock(&dma_win_list_lock); 2088 + list_add(&window->list, &dma_win_list); 2089 + spin_unlock(&dma_win_list_lock); 2090 + 2091 + mutex_unlock(&dma_win_init_mutex); 2092 + 2093 + goto exit; 2094 + 2095 + remove_property: 2096 + of_remove_property(pdn, win64); 2097 + free_property: 2098 + kfree(win64->name); 2099 + kfree(win64->value); 2100 + kfree(win64); 2101 + remove_window: 2102 + __remove_dma_window(pdn, ddw_avail, create.liobn); 2103 + 2104 + out_failed: 2105 + fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL); 2106 + if (!fpdn) 2107 + goto out_unlock; 2108 + fpdn->pdn = pdn; 2109 + list_add(&fpdn->list, &failed_ddw_pdn_list); 2110 + 2111 + out_unlock: 2112 + mutex_unlock(&dma_win_init_mutex); 2113 + 2114 + return ret; 2115 + exit: 2116 + /* Allocate the userspace view */ 2117 + pseries_tce_iommu_userspace_view_alloc(tbl); 2118 + tbl->it_allocated_size = spapr_tce_get_table_size(page_shift, window_size, levels); 2119 + 1879 2120 *ptbl = iommu_tce_table_get(tbl); 2121 + 1880 2122 return 0; 2123 + } 2124 + 2125 + static bool is_default_window_table(struct iommu_table_group *table_group, struct iommu_table *tbl) 2126 + { 2127 + if (((tbl->it_size << tbl->it_page_shift) <= table_group->tce32_size) && 2128 + (tbl->it_page_shift == IOMMU_PAGE_SHIFT_4K)) 2129 + return true; 2130 + 2131 + return false; 1881 2132 } 1882 2133 1883 2134 static long spapr_tce_set_window(struct iommu_table_group *table_group, ··· 2190 1837 2191 1838 static long spapr_tce_unset_window(struct iommu_table_group *table_group, int num) 2192 1839 { 1840 + struct pci_dev *pdev = iommu_group_get_first_pci_dev(table_group->group); 1841 + struct device_node *dn = pci_device_to_OF_node(pdev), *pdn; 1842 + struct iommu_table *tbl = table_group->tables[num]; 1843 + struct failed_ddw_pdn *fpdn; 1844 + struct dma_win *window; 1845 + const char *win_name; 1846 + int ret = -ENODEV; 1847 + 1848 + mutex_lock(&dma_win_init_mutex); 1849 + 1850 + if ((num == 0) && is_default_window_table(table_group, tbl)) 1851 + win_name = "ibm,dma-window"; 1852 + else 1853 + win_name = DMA64_PROPNAME; 1854 + 1855 + pdn = pci_dma_find(dn, NULL); 1856 + if (!pdn || !PCI_DN(pdn)) { /* Niether of 32s|64-bit exist! */ 1857 + dev_warn(&pdev->dev, "No dma-windows exist for the node %pOF\n", pdn); 1858 + goto out_failed; 1859 + } 1860 + 1861 + /* Dont clear the TCEs, User should have done it */ 1862 + if (remove_dma_window_named(pdn, true, win_name, false)) { 1863 + pr_err("%s: The existing DDW removal failed for node %pOF\n", __func__, pdn); 1864 + goto out_failed; /* Could not remove it either! */ 1865 + } 1866 + 1867 + if (strcmp(win_name, DMA64_PROPNAME) == 0) { 1868 + spin_lock(&dma_win_list_lock); 1869 + list_for_each_entry(window, &dma_win_list, list) { 1870 + if (window->device == pdn) { 1871 + list_del(&window->list); 1872 + kfree(window); 1873 + break; 1874 + } 1875 + } 1876 + spin_unlock(&dma_win_list_lock); 1877 + } 1878 + 1879 + iommu_tce_table_put(table_group->tables[num]); 1880 + table_group->tables[num] = NULL; 1881 + 1882 + ret = 0; 1883 + 1884 + goto out_unlock; 1885 + 1886 + out_failed: 1887 + fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL); 1888 + if (!fpdn) 1889 + goto out_unlock; 1890 + fpdn->pdn = pdn; 1891 + list_add(&fpdn->list, &failed_ddw_pdn_list); 1892 + 1893 + out_unlock: 1894 + mutex_unlock(&dma_win_init_mutex); 1895 + 1896 + return ret; 1897 + } 1898 + 1899 + static long spapr_tce_take_ownership(struct iommu_table_group *table_group, struct device *dev) 1900 + { 1901 + struct iommu_table *tbl = table_group->tables[0]; 1902 + struct pci_dev *pdev = to_pci_dev(dev); 1903 + struct device_node *dn = pci_device_to_OF_node(pdev); 1904 + struct device_node *pdn; 1905 + 1906 + /* SRIOV VFs using direct map by the host driver OR multifunction devices 1907 + * where the ownership was taken on the attempt by the first function 1908 + */ 1909 + if (!tbl && (table_group->max_dynamic_windows_supported != 1)) 1910 + return 0; 1911 + 1912 + mutex_lock(&dma_win_init_mutex); 1913 + 1914 + pdn = pci_dma_find(dn, NULL); 1915 + if (!pdn || !PCI_DN(pdn)) { /* Niether of 32s|64-bit exist! */ 1916 + dev_warn(&pdev->dev, "No dma-windows exist for the node %pOF\n", pdn); 1917 + mutex_unlock(&dma_win_init_mutex); 1918 + return -1; 1919 + } 1920 + 1921 + /* 1922 + * Though rtas call reset-pe removes the DDW, it doesn't clear the entries on the table 1923 + * if there are any. In case of direct map, the entries will be left over, which 1924 + * is fine for PEs with 2 DMA windows where the second window is created with create-pe 1925 + * at which point the table is cleared. However, on VFs having only one DMA window, the 1926 + * default window would end up seeing the entries left over from the direct map done 1927 + * on the second window. So, remove the ddw explicitly so that clean_dma_window() 1928 + * cleans up the entries if any. 1929 + */ 1930 + if (remove_dynamic_dma_windows(pdev, pdn)) { 1931 + dev_warn(&pdev->dev, "The existing DDW removal failed for node %pOF\n", pdn); 1932 + mutex_unlock(&dma_win_init_mutex); 1933 + return -1; 1934 + } 1935 + 1936 + /* The table_group->tables[0] is not null now, it must be the default window 1937 + * Remove it, let the userspace create it as it needs. 1938 + */ 1939 + if (table_group->tables[0]) { 1940 + remove_dma_window_named(pdn, true, "ibm,dma-window", true); 1941 + iommu_tce_table_put(tbl); 1942 + table_group->tables[0] = NULL; 1943 + } 1944 + set_iommu_table_base(dev, NULL); 1945 + 1946 + mutex_unlock(&dma_win_init_mutex); 1947 + 2193 1948 return 0; 2194 1949 } 2195 1950 2196 - static long spapr_tce_take_ownership(struct iommu_table_group *table_group) 1951 + static void spapr_tce_release_ownership(struct iommu_table_group *table_group, struct device *dev) 2197 1952 { 2198 - int i, j, rc = 0; 1953 + struct iommu_table *tbl = table_group->tables[0]; 2199 1954 2200 - for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 2201 - struct iommu_table *tbl = table_group->tables[i]; 2202 - 2203 - if (!tbl || !tbl->it_map) 2204 - continue; 2205 - 2206 - rc = iommu_take_ownership(tbl); 2207 - if (!rc) 2208 - continue; 2209 - 2210 - for (j = 0; j < i; ++j) 2211 - iommu_release_ownership(table_group->tables[j]); 2212 - return rc; 1955 + if (tbl) { /* Default window already restored */ 1956 + return; 2213 1957 } 2214 - return 0; 2215 - } 2216 1958 2217 - static void spapr_tce_release_ownership(struct iommu_table_group *table_group) 2218 - { 2219 - int i; 1959 + mutex_lock(&dma_win_init_mutex); 2220 1960 2221 - for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 2222 - struct iommu_table *tbl = table_group->tables[i]; 1961 + /* Restore the default window */ 1962 + pseries_setup_default_iommu_config(table_group, dev); 2223 1963 2224 - if (!tbl) 2225 - continue; 1964 + mutex_unlock(&dma_win_init_mutex); 2226 1965 2227 - if (tbl->it_map) 2228 - iommu_release_ownership(tbl); 2229 - } 1966 + return; 2230 1967 } 2231 1968 2232 1969 static struct iommu_table_group_ops spapr_tce_table_group_ops = { ··· 2389 1946 * we have to remove the property when releasing 2390 1947 * the device node. 2391 1948 */ 2392 - if (remove_ddw(np, false, DIRECT64_PROPNAME)) 2393 - remove_ddw(np, false, DMA64_PROPNAME); 1949 + if (remove_dma_window_named(np, false, DIRECT64_PROPNAME, true)) 1950 + remove_dma_window_named(np, false, DMA64_PROPNAME, true); 2394 1951 2395 1952 if (pci && pci->table_group) 2396 1953 iommu_pseries_free_group(pci->table_group,