Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm/memremap_pages: support multiple ranges per invocation

In support of device-dax growing the ability to front physically
dis-contiguous ranges of memory, update devm_memremap_pages() to track
multiple ranges with a single reference counter and devm instance.

Convert all [devm_]memremap_pages() users to specify the number of ranges
they are mapping in their 'struct dev_pagemap' instance.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: "Jérôme Glisse" <jglisse@redhat.co
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brice Goglin <Brice.Goglin@inria.fr>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hulk Robot <hulkci@huawei.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Jason Yan <yanaijie@huawei.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Cc: Jia He <justin.he@arm.com>
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: kernel test robot <lkp@intel.com>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lkml.kernel.org/r/159643103789.4062302.18426128170217903785.stgit@dwillia2-desk3.amr.corp.intel.com
Link: https://lkml.kernel.org/r/160106116293.30709.13350662794915396198.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Dan Williams and committed by
Linus Torvalds
b7b3c01b a4574f63

+174 -118
+1
arch/powerpc/kvm/book3s_hv_uvmem.c
··· 1172 1172 kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE; 1173 1173 kvmppc_uvmem_pgmap.range.start = res->start; 1174 1174 kvmppc_uvmem_pgmap.range.end = res->end; 1175 + kvmppc_uvmem_pgmap.nr_range = 1; 1175 1176 kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops; 1176 1177 /* just one global instance: */ 1177 1178 kvmppc_uvmem_pgmap.owner = &kvmppc_uvmem_pgmap;
+1
drivers/dax/device.c
··· 417 417 if (!pgmap) 418 418 return -ENOMEM; 419 419 pgmap->range = *range; 420 + pgmap->nr_range = 1; 420 421 } 421 422 pgmap->type = MEMORY_DEVICE_GENERIC; 422 423 addr = devm_memremap_pages(dev, pgmap);
+1
drivers/gpu/drm/nouveau/nouveau_dmem.c
··· 251 251 chunk->pagemap.type = MEMORY_DEVICE_PRIVATE; 252 252 chunk->pagemap.range.start = res->start; 253 253 chunk->pagemap.range.end = res->end; 254 + chunk->pagemap.nr_range = 1; 254 255 chunk->pagemap.ops = &nouveau_dmem_pagemap_ops; 255 256 chunk->pagemap.owner = drm->dev; 256 257
+1
drivers/nvdimm/pfn_devs.c
··· 693 693 .start = nsio->res.start + start_pad, 694 694 .end = nsio->res.end - end_trunc, 695 695 }; 696 + pgmap->nr_range = 1; 696 697 if (nd_pfn->mode == PFN_MODE_RAM) { 697 698 if (offset < reserve) 698 699 return -EINVAL;
+1
drivers/nvdimm/pmem.c
··· 441 441 } else if (pmem_should_map_pages(dev)) { 442 442 pmem->pgmap.range.start = res->start; 443 443 pmem->pgmap.range.end = res->end; 444 + pmem->pgmap.nr_range = 1; 444 445 pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; 445 446 pmem->pgmap.ops = &fsdax_pagemap_ops; 446 447 addr = devm_memremap_pages(dev, &pmem->pgmap);
+1
drivers/pci/p2pdma.c
··· 187 187 pgmap = &p2p_pgmap->pgmap; 188 188 pgmap->range.start = pci_resource_start(pdev, bar) + offset; 189 189 pgmap->range.end = pgmap->range.start + size - 1; 190 + pgmap->nr_range = 1; 190 191 pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; 191 192 192 193 p2p_pgmap->provider = pdev;
+1
drivers/xen/unpopulated-alloc.c
··· 47 47 .start = res->start, 48 48 .end = res->end, 49 49 }; 50 + pgmap->nr_range = 1; 50 51 pgmap->owner = res; 51 52 52 53 #ifdef CONFIG_XEN_HAVE_PVMMU
+8 -2
include/linux/memremap.h
··· 94 94 /** 95 95 * struct dev_pagemap - metadata for ZONE_DEVICE mappings 96 96 * @altmap: pre-allocated/reserved memory for vmemmap allocations 97 - * @range: physical address range covered by @ref 98 97 * @ref: reference count that pins the devm_memremap_pages() mapping 99 98 * @internal_ref: internal reference if @ref is not provided by the caller 100 99 * @done: completion for @internal_ref ··· 103 104 * @owner: an opaque pointer identifying the entity that manages this 104 105 * instance. Used by various helpers to make sure that no 105 106 * foreign ZONE_DEVICE memory is accessed. 107 + * @nr_range: number of ranges to be mapped 108 + * @range: range to be mapped when nr_range == 1 109 + * @ranges: array of ranges to be mapped when nr_range > 1 106 110 */ 107 111 struct dev_pagemap { 108 112 struct vmem_altmap altmap; 109 - struct range range; 110 113 struct percpu_ref *ref; 111 114 struct percpu_ref internal_ref; 112 115 struct completion done; ··· 116 115 unsigned int flags; 117 116 const struct dev_pagemap_ops *ops; 118 117 void *owner; 118 + int nr_range; 119 + union { 120 + struct range range; 121 + struct range ranges[0]; 122 + }; 119 123 }; 120 124 121 125 static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap)
+1
lib/test_hmm.c
··· 472 472 devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; 473 473 devmem->pagemap.range.start = res->start; 474 474 devmem->pagemap.range.end = res->end; 475 + devmem->pagemap.nr_range = 1; 475 476 devmem->pagemap.ops = &dmirror_devmem_ops; 476 477 devmem->pagemap.owner = mdevice; 477 478
+158 -116
mm/memremap.c
··· 77 77 synchronize_rcu(); 78 78 } 79 79 80 - static unsigned long pfn_first(struct dev_pagemap *pgmap) 80 + static unsigned long pfn_first(struct dev_pagemap *pgmap, int range_id) 81 81 { 82 - return PHYS_PFN(pgmap->range.start) + 83 - vmem_altmap_offset(pgmap_altmap(pgmap)); 82 + struct range *range = &pgmap->ranges[range_id]; 83 + unsigned long pfn = PHYS_PFN(range->start); 84 + 85 + if (range_id) 86 + return pfn; 87 + return pfn + vmem_altmap_offset(pgmap_altmap(pgmap)); 84 88 } 85 89 86 - static unsigned long pfn_end(struct dev_pagemap *pgmap) 90 + static unsigned long pfn_end(struct dev_pagemap *pgmap, int range_id) 87 91 { 88 - const struct range *range = &pgmap->range; 92 + const struct range *range = &pgmap->ranges[range_id]; 89 93 90 94 return (range->start + range_len(range)) >> PAGE_SHIFT; 91 95 } ··· 101 97 return pfn + 1; 102 98 } 103 99 104 - #define for_each_device_pfn(pfn, map) \ 105 - for (pfn = pfn_first(map); pfn < pfn_end(map); pfn = pfn_next(pfn)) 100 + #define for_each_device_pfn(pfn, map, i) \ 101 + for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(pfn)) 106 102 107 103 static void dev_pagemap_kill(struct dev_pagemap *pgmap) 108 104 { ··· 128 124 pgmap->ref = NULL; 129 125 } 130 126 131 - void memunmap_pages(struct dev_pagemap *pgmap) 127 + static void pageunmap_range(struct dev_pagemap *pgmap, int range_id) 132 128 { 133 - struct range *range = &pgmap->range; 129 + struct range *range = &pgmap->ranges[range_id]; 134 130 struct page *first_page; 135 - unsigned long pfn; 136 131 int nid; 137 132 138 - dev_pagemap_kill(pgmap); 139 - for_each_device_pfn(pfn, pgmap) 140 - put_page(pfn_to_page(pfn)); 141 - dev_pagemap_cleanup(pgmap); 142 - 143 133 /* make sure to access a memmap that was actually initialized */ 144 - first_page = pfn_to_page(pfn_first(pgmap)); 134 + first_page = pfn_to_page(pfn_first(pgmap, range_id)); 145 135 146 136 /* pages are dead and unused, undo the arch mapping */ 147 137 nid = page_to_nid(first_page); ··· 155 157 156 158 untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range)); 157 159 pgmap_array_delete(range); 160 + } 161 + 162 + void memunmap_pages(struct dev_pagemap *pgmap) 163 + { 164 + unsigned long pfn; 165 + int i; 166 + 167 + dev_pagemap_kill(pgmap); 168 + for (i = 0; i < pgmap->nr_range; i++) 169 + for_each_device_pfn(pfn, pgmap, i) 170 + put_page(pfn_to_page(pfn)); 171 + dev_pagemap_cleanup(pgmap); 172 + 173 + for (i = 0; i < pgmap->nr_range; i++) 174 + pageunmap_range(pgmap, i); 175 + 158 176 WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n"); 159 177 devmap_managed_enable_put(); 160 178 } ··· 189 175 complete(&pgmap->done); 190 176 } 191 177 178 + static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params, 179 + int range_id, int nid) 180 + { 181 + struct range *range = &pgmap->ranges[range_id]; 182 + struct dev_pagemap *conflict_pgmap; 183 + int error, is_ram; 184 + 185 + if (WARN_ONCE(pgmap_altmap(pgmap) && range_id > 0, 186 + "altmap not supported for multiple ranges\n")) 187 + return -EINVAL; 188 + 189 + conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->start), NULL); 190 + if (conflict_pgmap) { 191 + WARN(1, "Conflicting mapping in same section\n"); 192 + put_dev_pagemap(conflict_pgmap); 193 + return -ENOMEM; 194 + } 195 + 196 + conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->end), NULL); 197 + if (conflict_pgmap) { 198 + WARN(1, "Conflicting mapping in same section\n"); 199 + put_dev_pagemap(conflict_pgmap); 200 + return -ENOMEM; 201 + } 202 + 203 + is_ram = region_intersects(range->start, range_len(range), 204 + IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE); 205 + 206 + if (is_ram != REGION_DISJOINT) { 207 + WARN_ONCE(1, "attempted on %s region %#llx-%#llx\n", 208 + is_ram == REGION_MIXED ? "mixed" : "ram", 209 + range->start, range->end); 210 + return -ENXIO; 211 + } 212 + 213 + error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(range->start), 214 + PHYS_PFN(range->end), pgmap, GFP_KERNEL)); 215 + if (error) 216 + return error; 217 + 218 + if (nid < 0) 219 + nid = numa_mem_id(); 220 + 221 + error = track_pfn_remap(NULL, &params->pgprot, PHYS_PFN(range->start), 0, 222 + range_len(range)); 223 + if (error) 224 + goto err_pfn_remap; 225 + 226 + mem_hotplug_begin(); 227 + 228 + /* 229 + * For device private memory we call add_pages() as we only need to 230 + * allocate and initialize struct page for the device memory. More- 231 + * over the device memory is un-accessible thus we do not want to 232 + * create a linear mapping for the memory like arch_add_memory() 233 + * would do. 234 + * 235 + * For all other device memory types, which are accessible by 236 + * the CPU, we do want the linear mapping and thus use 237 + * arch_add_memory(). 238 + */ 239 + if (pgmap->type == MEMORY_DEVICE_PRIVATE) { 240 + error = add_pages(nid, PHYS_PFN(range->start), 241 + PHYS_PFN(range_len(range)), params); 242 + } else { 243 + error = kasan_add_zero_shadow(__va(range->start), range_len(range)); 244 + if (error) { 245 + mem_hotplug_done(); 246 + goto err_kasan; 247 + } 248 + 249 + error = arch_add_memory(nid, range->start, range_len(range), 250 + params); 251 + } 252 + 253 + if (!error) { 254 + struct zone *zone; 255 + 256 + zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE]; 257 + move_pfn_range_to_zone(zone, PHYS_PFN(range->start), 258 + PHYS_PFN(range_len(range)), params->altmap); 259 + } 260 + 261 + mem_hotplug_done(); 262 + if (error) 263 + goto err_add_memory; 264 + 265 + /* 266 + * Initialization of the pages has been deferred until now in order 267 + * to allow us to do the work while not holding the hotplug lock. 268 + */ 269 + memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], 270 + PHYS_PFN(range->start), 271 + PHYS_PFN(range_len(range)), pgmap); 272 + percpu_ref_get_many(pgmap->ref, pfn_end(pgmap, range_id) 273 + - pfn_first(pgmap, range_id)); 274 + return 0; 275 + 276 + err_add_memory: 277 + kasan_remove_zero_shadow(__va(range->start), range_len(range)); 278 + err_kasan: 279 + untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range)); 280 + err_pfn_remap: 281 + pgmap_array_delete(range); 282 + return error; 283 + } 284 + 285 + 192 286 /* 193 287 * Not device managed version of dev_memremap_pages, undone by 194 288 * memunmap_pages(). Please use dev_memremap_pages if you have a struct ··· 304 182 */ 305 183 void *memremap_pages(struct dev_pagemap *pgmap, int nid) 306 184 { 307 - struct range *range = &pgmap->range; 308 - struct dev_pagemap *conflict_pgmap; 309 185 struct mhp_params params = { 310 - /* 311 - * We do not want any optional features only our own memmap 312 - */ 313 186 .altmap = pgmap_altmap(pgmap), 314 187 .pgprot = PAGE_KERNEL, 315 188 }; 316 - int error, is_ram; 189 + const int nr_range = pgmap->nr_range; 317 190 bool need_devmap_managed = true; 191 + int error, i; 192 + 193 + if (WARN_ONCE(!nr_range, "nr_range must be specified\n")) 194 + return ERR_PTR(-EINVAL); 318 195 319 196 switch (pgmap->type) { 320 197 case MEMORY_DEVICE_PRIVATE: ··· 372 251 return ERR_PTR(error); 373 252 } 374 253 375 - conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->start), NULL); 376 - if (conflict_pgmap) { 377 - WARN(1, "Conflicting mapping in same section\n"); 378 - put_dev_pagemap(conflict_pgmap); 379 - error = -ENOMEM; 380 - goto err_array; 381 - } 382 - 383 - conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->end), NULL); 384 - if (conflict_pgmap) { 385 - WARN(1, "Conflicting mapping in same section\n"); 386 - put_dev_pagemap(conflict_pgmap); 387 - error = -ENOMEM; 388 - goto err_array; 389 - } 390 - 391 - is_ram = region_intersects(range->start, range_len(range), 392 - IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE); 393 - 394 - if (is_ram != REGION_DISJOINT) { 395 - WARN_ONCE(1, "attempted on %s region %#llx-%#llx\n", 396 - is_ram == REGION_MIXED ? "mixed" : "ram", 397 - range->start, range->end); 398 - error = -ENXIO; 399 - goto err_array; 400 - } 401 - 402 - error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(range->start), 403 - PHYS_PFN(range->end), pgmap, GFP_KERNEL)); 404 - if (error) 405 - goto err_array; 406 - 407 - if (nid < 0) 408 - nid = numa_mem_id(); 409 - 410 - error = track_pfn_remap(NULL, &params.pgprot, PHYS_PFN(range->start), 0, 411 - range_len(range)); 412 - if (error) 413 - goto err_pfn_remap; 414 - 415 - mem_hotplug_begin(); 416 - 417 254 /* 418 - * For device private memory we call add_pages() as we only need to 419 - * allocate and initialize struct page for the device memory. More- 420 - * over the device memory is un-accessible thus we do not want to 421 - * create a linear mapping for the memory like arch_add_memory() 422 - * would do. 423 - * 424 - * For all other device memory types, which are accessible by 425 - * the CPU, we do want the linear mapping and thus use 426 - * arch_add_memory(). 255 + * Clear the pgmap nr_range as it will be incremented for each 256 + * successfully processed range. This communicates how many 257 + * regions to unwind in the abort case. 427 258 */ 428 - if (pgmap->type == MEMORY_DEVICE_PRIVATE) { 429 - error = add_pages(nid, PHYS_PFN(range->start), 430 - PHYS_PFN(range_len(range)), &params); 431 - } else { 432 - error = kasan_add_zero_shadow(__va(range->start), range_len(range)); 433 - if (error) { 434 - mem_hotplug_done(); 435 - goto err_kasan; 436 - } 437 - 438 - error = arch_add_memory(nid, range->start, range_len(range), 439 - &params); 259 + pgmap->nr_range = 0; 260 + error = 0; 261 + for (i = 0; i < nr_range; i++) { 262 + error = pagemap_range(pgmap, &params, i, nid); 263 + if (error) 264 + break; 265 + pgmap->nr_range++; 440 266 } 441 267 442 - if (!error) { 443 - struct zone *zone; 444 - 445 - zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE]; 446 - move_pfn_range_to_zone(zone, PHYS_PFN(range->start), 447 - PHYS_PFN(range_len(range)), params.altmap); 268 + if (i < nr_range) { 269 + memunmap_pages(pgmap); 270 + pgmap->nr_range = nr_range; 271 + return ERR_PTR(error); 448 272 } 449 273 450 - mem_hotplug_done(); 451 - if (error) 452 - goto err_add_memory; 453 - 454 - /* 455 - * Initialization of the pages has been deferred until now in order 456 - * to allow us to do the work while not holding the hotplug lock. 457 - */ 458 - memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], 459 - PHYS_PFN(range->start), 460 - PHYS_PFN(range_len(range)), pgmap); 461 - percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap)); 462 - return __va(range->start); 463 - 464 - err_add_memory: 465 - kasan_remove_zero_shadow(__va(range->start), range_len(range)); 466 - err_kasan: 467 - untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range)); 468 - err_pfn_remap: 469 - pgmap_array_delete(range); 470 - err_array: 471 - dev_pagemap_kill(pgmap); 472 - dev_pagemap_cleanup(pgmap); 473 - devmap_managed_enable_put(); 474 - return ERR_PTR(error); 274 + return __va(pgmap->ranges[0].start); 475 275 } 476 276 EXPORT_SYMBOL_GPL(memremap_pages); 477 277