Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
"16 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
mm/devm_memremap_pages: fix final page put race
PCI/P2PDMA: track pgmap references per resource, not globally
lib/genalloc: introduce chunk owners
PCI/P2PDMA: fix the gen_pool_add_virt() failure path
mm/devm_memremap_pages: introduce devm_memunmap_pages
drivers/base/devres: introduce devm_release_action()
mm/vmscan.c: fix trying to reclaim unevictable LRU page
coredump: fix race condition between collapse_huge_page() and core dumping
mm/mlock.c: change count_mm_mlocked_page_nr return type
mm: mmu_gather: remove __tlb_reset_range() for force flush
fs/ocfs2: fix race in ocfs2_dentry_attach_lock()
mm/vmscan.c: fix recent_rotated history
mm/mlock.c: mlockall error for flag MCL_ONFAULT
scripts/decode_stacktrace.sh: prefix addr2line with $CROSS_COMPILE
mm/list_lru.c: fix memory leak in __memcg_init_list_lru_node
mm: memcontrol: don't batch updates of local VM stats and events

+311 -141
+23 -1
drivers/base/devres.c
··· 755 755 756 756 WARN_ON(devres_destroy(dev, devm_action_release, devm_action_match, 757 757 &devres)); 758 - 759 758 } 760 759 EXPORT_SYMBOL_GPL(devm_remove_action); 760 + 761 + /** 762 + * devm_release_action() - release previously added custom action 763 + * @dev: Device that owns the action 764 + * @action: Function implementing the action 765 + * @data: Pointer to data passed to @action implementation 766 + * 767 + * Releases and removes instance of @action previously added by 768 + * devm_add_action(). Both action and data should match one of the 769 + * existing entries. 770 + */ 771 + void devm_release_action(struct device *dev, void (*action)(void *), void *data) 772 + { 773 + struct action_devres devres = { 774 + .data = data, 775 + .action = action, 776 + }; 777 + 778 + WARN_ON(devres_release(dev, devm_action_release, devm_action_match, 779 + &devres)); 780 + 781 + } 782 + EXPORT_SYMBOL_GPL(devm_release_action); 761 783 762 784 /* 763 785 * Managed kmalloc/kfree
+3 -10
drivers/dax/device.c
··· 27 27 complete(&dev_dax->cmp); 28 28 } 29 29 30 - static void dev_dax_percpu_exit(void *data) 30 + static void dev_dax_percpu_exit(struct percpu_ref *ref) 31 31 { 32 - struct percpu_ref *ref = data; 33 32 struct dev_dax *dev_dax = ref_to_dev_dax(ref); 34 33 35 34 dev_dbg(&dev_dax->dev, "%s\n", __func__); ··· 465 466 if (rc) 466 467 return rc; 467 468 468 - rc = devm_add_action_or_reset(dev, dev_dax_percpu_exit, &dev_dax->ref); 469 - if (rc) 470 - return rc; 471 - 472 469 dev_dax->pgmap.ref = &dev_dax->ref; 473 470 dev_dax->pgmap.kill = dev_dax_percpu_kill; 471 + dev_dax->pgmap.cleanup = dev_dax_percpu_exit; 474 472 addr = devm_memremap_pages(dev, &dev_dax->pgmap); 475 - if (IS_ERR(addr)) { 476 - devm_remove_action(dev, dev_dax_percpu_exit, &dev_dax->ref); 477 - percpu_ref_exit(&dev_dax->ref); 473 + if (IS_ERR(addr)) 478 474 return PTR_ERR(addr); 479 - } 480 475 481 476 inode = dax_inode(dax_dev); 482 477 cdev = inode->i_cdev;
+13 -4
drivers/nvdimm/pmem.c
··· 303 303 NULL, 304 304 }; 305 305 306 - static void pmem_release_queue(void *q) 306 + static void __pmem_release_queue(struct percpu_ref *ref) 307 307 { 308 + struct request_queue *q; 309 + 310 + q = container_of(ref, typeof(*q), q_usage_counter); 308 311 blk_cleanup_queue(q); 312 + } 313 + 314 + static void pmem_release_queue(void *ref) 315 + { 316 + __pmem_release_queue(ref); 309 317 } 310 318 311 319 static void pmem_freeze_queue(struct percpu_ref *ref) ··· 407 399 if (!q) 408 400 return -ENOMEM; 409 401 410 - if (devm_add_action_or_reset(dev, pmem_release_queue, q)) 411 - return -ENOMEM; 412 - 413 402 pmem->pfn_flags = PFN_DEV; 414 403 pmem->pgmap.ref = &q->q_usage_counter; 415 404 pmem->pgmap.kill = pmem_freeze_queue; 405 + pmem->pgmap.cleanup = __pmem_release_queue; 416 406 if (is_nd_pfn(dev)) { 417 407 if (setup_pagemap_fsdax(dev, &pmem->pgmap)) 418 408 return -ENOMEM; ··· 431 425 pmem->pfn_flags |= PFN_MAP; 432 426 memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); 433 427 } else { 428 + if (devm_add_action_or_reset(dev, pmem_release_queue, 429 + &q->q_usage_counter)) 430 + return -ENOMEM; 434 431 addr = devm_memremap(dev, pmem->phys_addr, 435 432 pmem->size, ARCH_MEMREMAP_PMEM); 436 433 memcpy(&bb_res, &nsio->res, sizeof(bb_res));
+73 -44
drivers/pci/p2pdma.c
··· 20 20 #include <linux/seq_buf.h> 21 21 22 22 struct pci_p2pdma { 23 - struct percpu_ref devmap_ref; 24 - struct completion devmap_ref_done; 25 23 struct gen_pool *pool; 26 24 bool p2pmem_published; 25 + }; 26 + 27 + struct p2pdma_pagemap { 28 + struct dev_pagemap pgmap; 29 + struct percpu_ref ref; 30 + struct completion ref_done; 27 31 }; 28 32 29 33 static ssize_t size_show(struct device *dev, struct device_attribute *attr, ··· 78 74 .name = "p2pmem", 79 75 }; 80 76 77 + static struct p2pdma_pagemap *to_p2p_pgmap(struct percpu_ref *ref) 78 + { 79 + return container_of(ref, struct p2pdma_pagemap, ref); 80 + } 81 + 81 82 static void pci_p2pdma_percpu_release(struct percpu_ref *ref) 82 83 { 83 - struct pci_p2pdma *p2p = 84 - container_of(ref, struct pci_p2pdma, devmap_ref); 84 + struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref); 85 85 86 - complete_all(&p2p->devmap_ref_done); 86 + complete(&p2p_pgmap->ref_done); 87 87 } 88 88 89 89 static void pci_p2pdma_percpu_kill(struct percpu_ref *ref) 90 90 { 91 - /* 92 - * pci_p2pdma_add_resource() may be called multiple times 93 - * by a driver and may register the percpu_kill devm action multiple 94 - * times. We only want the first action to actually kill the 95 - * percpu_ref. 96 - */ 97 - if (percpu_ref_is_dying(ref)) 98 - return; 99 - 100 91 percpu_ref_kill(ref); 92 + } 93 + 94 + static void pci_p2pdma_percpu_cleanup(struct percpu_ref *ref) 95 + { 96 + struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref); 97 + 98 + wait_for_completion(&p2p_pgmap->ref_done); 99 + percpu_ref_exit(&p2p_pgmap->ref); 101 100 } 102 101 103 102 static void pci_p2pdma_release(void *data) 104 103 { 105 104 struct pci_dev *pdev = data; 105 + struct pci_p2pdma *p2pdma = pdev->p2pdma; 106 106 107 - if (!pdev->p2pdma) 107 + if (!p2pdma) 108 108 return; 109 109 110 - wait_for_completion(&pdev->p2pdma->devmap_ref_done); 111 - percpu_ref_exit(&pdev->p2pdma->devmap_ref); 112 - 113 - gen_pool_destroy(pdev->p2pdma->pool); 114 - sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group); 110 + /* Flush and disable pci_alloc_p2p_mem() */ 115 111 pdev->p2pdma = NULL; 112 + synchronize_rcu(); 113 + 114 + gen_pool_destroy(p2pdma->pool); 115 + sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group); 116 116 } 117 117 118 118 static int pci_p2pdma_setup(struct pci_dev *pdev) ··· 131 123 p2p->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev)); 132 124 if (!p2p->pool) 133 125 goto out; 134 - 135 - init_completion(&p2p->devmap_ref_done); 136 - error = percpu_ref_init(&p2p->devmap_ref, 137 - pci_p2pdma_percpu_release, 0, GFP_KERNEL); 138 - if (error) 139 - goto out_pool_destroy; 140 126 141 127 error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev); 142 128 if (error) ··· 165 163 int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, 166 164 u64 offset) 167 165 { 166 + struct p2pdma_pagemap *p2p_pgmap; 168 167 struct dev_pagemap *pgmap; 169 168 void *addr; 170 169 int error; ··· 188 185 return error; 189 186 } 190 187 191 - pgmap = devm_kzalloc(&pdev->dev, sizeof(*pgmap), GFP_KERNEL); 192 - if (!pgmap) 188 + p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL); 189 + if (!p2p_pgmap) 193 190 return -ENOMEM; 191 + 192 + init_completion(&p2p_pgmap->ref_done); 193 + error = percpu_ref_init(&p2p_pgmap->ref, 194 + pci_p2pdma_percpu_release, 0, GFP_KERNEL); 195 + if (error) 196 + goto pgmap_free; 197 + 198 + pgmap = &p2p_pgmap->pgmap; 194 199 195 200 pgmap->res.start = pci_resource_start(pdev, bar) + offset; 196 201 pgmap->res.end = pgmap->res.start + size - 1; 197 202 pgmap->res.flags = pci_resource_flags(pdev, bar); 198 - pgmap->ref = &pdev->p2pdma->devmap_ref; 203 + pgmap->ref = &p2p_pgmap->ref; 199 204 pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; 200 205 pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) - 201 206 pci_resource_start(pdev, bar); 202 207 pgmap->kill = pci_p2pdma_percpu_kill; 208 + pgmap->cleanup = pci_p2pdma_percpu_cleanup; 203 209 204 210 addr = devm_memremap_pages(&pdev->dev, pgmap); 205 211 if (IS_ERR(addr)) { ··· 216 204 goto pgmap_free; 217 205 } 218 206 219 - error = gen_pool_add_virt(pdev->p2pdma->pool, (unsigned long)addr, 207 + error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr, 220 208 pci_bus_address(pdev, bar) + offset, 221 - resource_size(&pgmap->res), dev_to_node(&pdev->dev)); 209 + resource_size(&pgmap->res), dev_to_node(&pdev->dev), 210 + &p2p_pgmap->ref); 222 211 if (error) 223 - goto pgmap_free; 212 + goto pages_free; 224 213 225 214 pci_info(pdev, "added peer-to-peer DMA memory %pR\n", 226 215 &pgmap->res); 227 216 228 217 return 0; 229 218 219 + pages_free: 220 + devm_memunmap_pages(&pdev->dev, pgmap); 230 221 pgmap_free: 231 - devm_kfree(&pdev->dev, pgmap); 222 + devm_kfree(&pdev->dev, p2p_pgmap); 232 223 return error; 233 224 } 234 225 EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource); ··· 600 585 */ 601 586 void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size) 602 587 { 603 - void *ret; 588 + void *ret = NULL; 589 + struct percpu_ref *ref; 604 590 591 + /* 592 + * Pairs with synchronize_rcu() in pci_p2pdma_release() to 593 + * ensure pdev->p2pdma is non-NULL for the duration of the 594 + * read-lock. 595 + */ 596 + rcu_read_lock(); 605 597 if (unlikely(!pdev->p2pdma)) 606 - return NULL; 598 + goto out; 607 599 608 - if (unlikely(!percpu_ref_tryget_live(&pdev->p2pdma->devmap_ref))) 609 - return NULL; 600 + ret = (void *)gen_pool_alloc_owner(pdev->p2pdma->pool, size, 601 + (void **) &ref); 602 + if (!ret) 603 + goto out; 610 604 611 - ret = (void *)gen_pool_alloc(pdev->p2pdma->pool, size); 612 - 613 - if (unlikely(!ret)) 614 - percpu_ref_put(&pdev->p2pdma->devmap_ref); 615 - 605 + if (unlikely(!percpu_ref_tryget_live(ref))) { 606 + gen_pool_free(pdev->p2pdma->pool, (unsigned long) ret, size); 607 + ret = NULL; 608 + goto out; 609 + } 610 + out: 611 + rcu_read_unlock(); 616 612 return ret; 617 613 } 618 614 EXPORT_SYMBOL_GPL(pci_alloc_p2pmem); ··· 636 610 */ 637 611 void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size) 638 612 { 639 - gen_pool_free(pdev->p2pdma->pool, (uintptr_t)addr, size); 640 - percpu_ref_put(&pdev->p2pdma->devmap_ref); 613 + struct percpu_ref *ref; 614 + 615 + gen_pool_free_owner(pdev->p2pdma->pool, (uintptr_t)addr, size, 616 + (void **) &ref); 617 + percpu_ref_put(ref); 641 618 } 642 619 EXPORT_SYMBOL_GPL(pci_free_p2pmem); 643 620
+12
fs/ocfs2/dcache.c
··· 296 296 297 297 out_attach: 298 298 spin_lock(&dentry_attach_lock); 299 + if (unlikely(dentry->d_fsdata && !alias)) { 300 + /* d_fsdata is set by a racing thread which is doing 301 + * the same thing as this thread is doing. Leave the racing 302 + * thread going ahead and we return here. 303 + */ 304 + spin_unlock(&dentry_attach_lock); 305 + iput(dl->dl_inode); 306 + ocfs2_lock_res_free(&dl->dl_lockres); 307 + kfree(dl); 308 + return 0; 309 + } 310 + 299 311 dentry->d_fsdata = dl; 300 312 dl->dl_count++; 301 313 spin_unlock(&dentry_attach_lock);
+1
include/linux/device.h
··· 713 713 /* allows to add/remove a custom action to devres stack */ 714 714 int devm_add_action(struct device *dev, void (*action)(void *), void *data); 715 715 void devm_remove_action(struct device *dev, void (*action)(void *), void *data); 716 + void devm_release_action(struct device *dev, void (*action)(void *), void *data); 716 717 717 718 static inline int devm_add_action_or_reset(struct device *dev, 718 719 void (*action)(void *), void *data)
+49 -6
include/linux/genalloc.h
··· 75 75 struct list_head next_chunk; /* next chunk in pool */ 76 76 atomic_long_t avail; 77 77 phys_addr_t phys_addr; /* physical starting address of memory chunk */ 78 + void *owner; /* private data to retrieve at alloc time */ 78 79 unsigned long start_addr; /* start address of memory chunk */ 79 80 unsigned long end_addr; /* end address of memory chunk (inclusive) */ 80 81 unsigned long bits[0]; /* bitmap for allocating memory chunk */ ··· 97 96 98 97 extern struct gen_pool *gen_pool_create(int, int); 99 98 extern phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long); 100 - extern int gen_pool_add_virt(struct gen_pool *, unsigned long, phys_addr_t, 101 - size_t, int); 99 + extern int gen_pool_add_owner(struct gen_pool *, unsigned long, phys_addr_t, 100 + size_t, int, void *); 101 + 102 + static inline int gen_pool_add_virt(struct gen_pool *pool, unsigned long addr, 103 + phys_addr_t phys, size_t size, int nid) 104 + { 105 + return gen_pool_add_owner(pool, addr, phys, size, nid, NULL); 106 + } 107 + 102 108 /** 103 109 * gen_pool_add - add a new chunk of special memory to the pool 104 110 * @pool: pool to add new memory chunk to ··· 124 116 return gen_pool_add_virt(pool, addr, -1, size, nid); 125 117 } 126 118 extern void gen_pool_destroy(struct gen_pool *); 127 - extern unsigned long gen_pool_alloc(struct gen_pool *, size_t); 128 - extern unsigned long gen_pool_alloc_algo(struct gen_pool *, size_t, 129 - genpool_algo_t algo, void *data); 119 + unsigned long gen_pool_alloc_algo_owner(struct gen_pool *pool, size_t size, 120 + genpool_algo_t algo, void *data, void **owner); 121 + 122 + static inline unsigned long gen_pool_alloc_owner(struct gen_pool *pool, 123 + size_t size, void **owner) 124 + { 125 + return gen_pool_alloc_algo_owner(pool, size, pool->algo, pool->data, 126 + owner); 127 + } 128 + 129 + static inline unsigned long gen_pool_alloc_algo(struct gen_pool *pool, 130 + size_t size, genpool_algo_t algo, void *data) 131 + { 132 + return gen_pool_alloc_algo_owner(pool, size, algo, data, NULL); 133 + } 134 + 135 + /** 136 + * gen_pool_alloc - allocate special memory from the pool 137 + * @pool: pool to allocate from 138 + * @size: number of bytes to allocate from the pool 139 + * 140 + * Allocate the requested number of bytes from the specified pool. 141 + * Uses the pool allocation function (with first-fit algorithm by default). 142 + * Can not be used in NMI handler on architectures without 143 + * NMI-safe cmpxchg implementation. 144 + */ 145 + static inline unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) 146 + { 147 + return gen_pool_alloc_algo(pool, size, pool->algo, pool->data); 148 + } 149 + 130 150 extern void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, 131 151 dma_addr_t *dma); 132 - extern void gen_pool_free(struct gen_pool *, unsigned long, size_t); 152 + extern void gen_pool_free_owner(struct gen_pool *pool, unsigned long addr, 153 + size_t size, void **owner); 154 + static inline void gen_pool_free(struct gen_pool *pool, unsigned long addr, 155 + size_t size) 156 + { 157 + gen_pool_free_owner(pool, addr, size, NULL); 158 + } 159 + 133 160 extern void gen_pool_for_each_chunk(struct gen_pool *, 134 161 void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *); 135 162 extern size_t gen_pool_avail(struct gen_pool *);
+18 -8
include/linux/memcontrol.h
··· 117 117 struct mem_cgroup_per_node { 118 118 struct lruvec lruvec; 119 119 120 + /* Legacy local VM stats */ 121 + struct lruvec_stat __percpu *lruvec_stat_local; 122 + 123 + /* Subtree VM stats (batched updates) */ 120 124 struct lruvec_stat __percpu *lruvec_stat_cpu; 121 125 atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS]; 122 - atomic_long_t lruvec_stat_local[NR_VM_NODE_STAT_ITEMS]; 123 126 124 127 unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; 125 128 ··· 268 265 atomic_t moving_account; 269 266 struct task_struct *move_lock_task; 270 267 271 - /* memory.stat */ 268 + /* Legacy local VM stats and events */ 269 + struct memcg_vmstats_percpu __percpu *vmstats_local; 270 + 271 + /* Subtree VM stats and events (batched updates) */ 272 272 struct memcg_vmstats_percpu __percpu *vmstats_percpu; 273 273 274 274 MEMCG_PADDING(_pad2_); 275 275 276 276 atomic_long_t vmstats[MEMCG_NR_STAT]; 277 - atomic_long_t vmstats_local[MEMCG_NR_STAT]; 278 - 279 277 atomic_long_t vmevents[NR_VM_EVENT_ITEMS]; 280 - atomic_long_t vmevents_local[NR_VM_EVENT_ITEMS]; 281 278 279 + /* memory.events */ 282 280 atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; 283 281 284 282 unsigned long socket_pressure; ··· 571 567 static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg, 572 568 int idx) 573 569 { 574 - long x = atomic_long_read(&memcg->vmstats_local[idx]); 570 + long x = 0; 571 + int cpu; 572 + 573 + for_each_possible_cpu(cpu) 574 + x += per_cpu(memcg->vmstats_local->stat[idx], cpu); 575 575 #ifdef CONFIG_SMP 576 576 if (x < 0) 577 577 x = 0; ··· 649 641 enum node_stat_item idx) 650 642 { 651 643 struct mem_cgroup_per_node *pn; 652 - long x; 644 + long x = 0; 645 + int cpu; 653 646 654 647 if (mem_cgroup_disabled()) 655 648 return node_page_state(lruvec_pgdat(lruvec), idx); 656 649 657 650 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); 658 - x = atomic_long_read(&pn->lruvec_stat_local[idx]); 651 + for_each_possible_cpu(cpu) 652 + x += per_cpu(pn->lruvec_stat_local->count[idx], cpu); 659 653 #ifdef CONFIG_SMP 660 654 if (x < 0) 661 655 x = 0;
+8
include/linux/memremap.h
··· 81 81 * @res: physical address range covered by @ref 82 82 * @ref: reference count that pins the devm_memremap_pages() mapping 83 83 * @kill: callback to transition @ref to the dead state 84 + * @cleanup: callback to wait for @ref to be idle and reap it 84 85 * @dev: host device of the mapping for debug 85 86 * @data: private data pointer for page_free() 86 87 * @type: memory type: see MEMORY_* in memory_hotplug.h ··· 93 92 struct resource res; 94 93 struct percpu_ref *ref; 95 94 void (*kill)(struct percpu_ref *ref); 95 + void (*cleanup)(struct percpu_ref *ref); 96 96 struct device *dev; 97 97 void *data; 98 98 enum memory_type type; ··· 102 100 103 101 #ifdef CONFIG_ZONE_DEVICE 104 102 void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); 103 + void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap); 105 104 struct dev_pagemap *get_dev_pagemap(unsigned long pfn, 106 105 struct dev_pagemap *pgmap); 107 106 ··· 119 116 */ 120 117 WARN_ON_ONCE(1); 121 118 return ERR_PTR(-ENXIO); 119 + } 120 + 121 + static inline void devm_memunmap_pages(struct device *dev, 122 + struct dev_pagemap *pgmap) 123 + { 122 124 } 123 125 124 126 static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
+4
include/linux/sched/mm.h
··· 54 54 * followed by taking the mmap_sem for writing before modifying the 55 55 * vmas or anything the coredump pretends not to change from under it. 56 56 * 57 + * It also has to be called when mmgrab() is used in the context of 58 + * the process, but then the mm_count refcount is transferred outside 59 + * the context of the process to run down_write() on that pinned mm. 60 + * 57 61 * NOTE: find_extend_vma() called from GUP context is the only place 58 62 * that can modify the "mm" (notably the vm_start/end) under mmap_sem 59 63 * for reading and outside the context of the process, so it is also
+18 -5
kernel/memremap.c
··· 95 95 pgmap->kill(pgmap->ref); 96 96 for_each_device_pfn(pfn, pgmap) 97 97 put_page(pfn_to_page(pfn)); 98 + pgmap->cleanup(pgmap->ref); 98 99 99 100 /* pages are dead and unused, undo the arch mapping */ 100 101 align_start = res->start & ~(SECTION_SIZE - 1); ··· 134 133 * 2/ The altmap field may optionally be initialized, in which case altmap_valid 135 134 * must be set to true 136 135 * 137 - * 3/ pgmap->ref must be 'live' on entry and will be killed at 138 - * devm_memremap_pages_release() time, or if this routine fails. 136 + * 3/ pgmap->ref must be 'live' on entry and will be killed and reaped 137 + * at devm_memremap_pages_release() time, or if this routine fails. 139 138 * 140 139 * 4/ res is expected to be a host memory range that could feasibly be 141 140 * treated as a "System RAM" range, i.e. not a device mmio range, but ··· 157 156 pgprot_t pgprot = PAGE_KERNEL; 158 157 int error, nid, is_ram; 159 158 160 - if (!pgmap->ref || !pgmap->kill) 159 + if (!pgmap->ref || !pgmap->kill || !pgmap->cleanup) { 160 + WARN(1, "Missing reference count teardown definition\n"); 161 161 return ERR_PTR(-EINVAL); 162 + } 162 163 163 164 align_start = res->start & ~(SECTION_SIZE - 1); 164 165 align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) ··· 171 168 if (conflict_pgmap) { 172 169 dev_WARN(dev, "Conflicting mapping in same section\n"); 173 170 put_dev_pagemap(conflict_pgmap); 174 - return ERR_PTR(-ENOMEM); 171 + error = -ENOMEM; 172 + goto err_array; 175 173 } 176 174 177 175 conflict_pgmap = get_dev_pagemap(PHYS_PFN(align_end), NULL); 178 176 if (conflict_pgmap) { 179 177 dev_WARN(dev, "Conflicting mapping in same section\n"); 180 178 put_dev_pagemap(conflict_pgmap); 181 - return ERR_PTR(-ENOMEM); 179 + error = -ENOMEM; 180 + goto err_array; 182 181 } 183 182 184 183 is_ram = region_intersects(align_start, align_size, ··· 272 267 pgmap_array_delete(res); 273 268 err_array: 274 269 pgmap->kill(pgmap->ref); 270 + pgmap->cleanup(pgmap->ref); 271 + 275 272 return ERR_PTR(error); 276 273 } 277 274 EXPORT_SYMBOL_GPL(devm_memremap_pages); 275 + 276 + void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap) 277 + { 278 + devm_release_action(dev, devm_memremap_pages_release, pgmap); 279 + } 280 + EXPORT_SYMBOL_GPL(devm_memunmap_pages); 278 281 279 282 unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) 280 283 {
+25 -26
lib/genalloc.c
··· 168 168 EXPORT_SYMBOL(gen_pool_create); 169 169 170 170 /** 171 - * gen_pool_add_virt - add a new chunk of special memory to the pool 171 + * gen_pool_add_owner- add a new chunk of special memory to the pool 172 172 * @pool: pool to add new memory chunk to 173 173 * @virt: virtual starting address of memory chunk to add to pool 174 174 * @phys: physical starting address of memory chunk to add to pool 175 175 * @size: size in bytes of the memory chunk to add to pool 176 176 * @nid: node id of the node the chunk structure and bitmap should be 177 177 * allocated on, or -1 178 + * @owner: private data the publisher would like to recall at alloc time 178 179 * 179 180 * Add a new chunk of special memory to the specified pool. 180 181 * 181 182 * Returns 0 on success or a -ve errno on failure. 182 183 */ 183 - int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phys, 184 - size_t size, int nid) 184 + int gen_pool_add_owner(struct gen_pool *pool, unsigned long virt, phys_addr_t phys, 185 + size_t size, int nid, void *owner) 185 186 { 186 187 struct gen_pool_chunk *chunk; 187 188 int nbits = size >> pool->min_alloc_order; ··· 196 195 chunk->phys_addr = phys; 197 196 chunk->start_addr = virt; 198 197 chunk->end_addr = virt + size - 1; 198 + chunk->owner = owner; 199 199 atomic_long_set(&chunk->avail, size); 200 200 201 201 spin_lock(&pool->lock); ··· 205 203 206 204 return 0; 207 205 } 208 - EXPORT_SYMBOL(gen_pool_add_virt); 206 + EXPORT_SYMBOL(gen_pool_add_owner); 209 207 210 208 /** 211 209 * gen_pool_virt_to_phys - return the physical address of memory ··· 262 260 EXPORT_SYMBOL(gen_pool_destroy); 263 261 264 262 /** 265 - * gen_pool_alloc - allocate special memory from the pool 266 - * @pool: pool to allocate from 267 - * @size: number of bytes to allocate from the pool 268 - * 269 - * Allocate the requested number of bytes from the specified pool. 270 - * Uses the pool allocation function (with first-fit algorithm by default). 271 - * Can not be used in NMI handler on architectures without 272 - * NMI-safe cmpxchg implementation. 273 - */ 274 - unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) 275 - { 276 - return gen_pool_alloc_algo(pool, size, pool->algo, pool->data); 277 - } 278 - EXPORT_SYMBOL(gen_pool_alloc); 279 - 280 - /** 281 - * gen_pool_alloc_algo - allocate special memory from the pool 263 + * gen_pool_alloc_algo_owner - allocate special memory from the pool 282 264 * @pool: pool to allocate from 283 265 * @size: number of bytes to allocate from the pool 284 266 * @algo: algorithm passed from caller 285 267 * @data: data passed to algorithm 268 + * @owner: optionally retrieve the chunk owner 286 269 * 287 270 * Allocate the requested number of bytes from the specified pool. 288 271 * Uses the pool allocation function (with first-fit algorithm by default). 289 272 * Can not be used in NMI handler on architectures without 290 273 * NMI-safe cmpxchg implementation. 291 274 */ 292 - unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size, 293 - genpool_algo_t algo, void *data) 275 + unsigned long gen_pool_alloc_algo_owner(struct gen_pool *pool, size_t size, 276 + genpool_algo_t algo, void *data, void **owner) 294 277 { 295 278 struct gen_pool_chunk *chunk; 296 279 unsigned long addr = 0; ··· 285 298 #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG 286 299 BUG_ON(in_nmi()); 287 300 #endif 301 + 302 + if (owner) 303 + *owner = NULL; 288 304 289 305 if (size == 0) 290 306 return 0; ··· 316 326 addr = chunk->start_addr + ((unsigned long)start_bit << order); 317 327 size = nbits << order; 318 328 atomic_long_sub(size, &chunk->avail); 329 + if (owner) 330 + *owner = chunk->owner; 319 331 break; 320 332 } 321 333 rcu_read_unlock(); 322 334 return addr; 323 335 } 324 - EXPORT_SYMBOL(gen_pool_alloc_algo); 336 + EXPORT_SYMBOL(gen_pool_alloc_algo_owner); 325 337 326 338 /** 327 339 * gen_pool_dma_alloc - allocate special memory from the pool for DMA usage ··· 359 367 * @pool: pool to free to 360 368 * @addr: starting address of memory to free back to pool 361 369 * @size: size in bytes of memory to free 370 + * @owner: private data stashed at gen_pool_add() time 362 371 * 363 372 * Free previously allocated special memory back to the specified 364 373 * pool. Can not be used in NMI handler on architectures without 365 374 * NMI-safe cmpxchg implementation. 366 375 */ 367 - void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) 376 + void gen_pool_free_owner(struct gen_pool *pool, unsigned long addr, size_t size, 377 + void **owner) 368 378 { 369 379 struct gen_pool_chunk *chunk; 370 380 int order = pool->min_alloc_order; ··· 375 381 #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG 376 382 BUG_ON(in_nmi()); 377 383 #endif 384 + 385 + if (owner) 386 + *owner = NULL; 378 387 379 388 nbits = (size + (1UL << order) - 1) >> order; 380 389 rcu_read_lock(); ··· 389 392 BUG_ON(remain); 390 393 size = nbits << order; 391 394 atomic_long_add(size, &chunk->avail); 395 + if (owner) 396 + *owner = chunk->owner; 392 397 rcu_read_unlock(); 393 398 return; 394 399 } ··· 398 399 rcu_read_unlock(); 399 400 BUG(); 400 401 } 401 - EXPORT_SYMBOL(gen_pool_free); 402 + EXPORT_SYMBOL(gen_pool_free_owner); 402 403 403 404 /** 404 405 * gen_pool_for_each_chunk - call func for every chunk of generic memory pool
+3 -11
mm/hmm.c
··· 1354 1354 complete(&devmem->completion); 1355 1355 } 1356 1356 1357 - static void hmm_devmem_ref_exit(void *data) 1357 + static void hmm_devmem_ref_exit(struct percpu_ref *ref) 1358 1358 { 1359 - struct percpu_ref *ref = data; 1360 1359 struct hmm_devmem *devmem; 1361 1360 1362 1361 devmem = container_of(ref, struct hmm_devmem, ref); ··· 1432 1433 if (ret) 1433 1434 return ERR_PTR(ret); 1434 1435 1435 - ret = devm_add_action_or_reset(device, hmm_devmem_ref_exit, &devmem->ref); 1436 - if (ret) 1437 - return ERR_PTR(ret); 1438 - 1439 1436 size = ALIGN(size, PA_SECTION_SIZE); 1440 1437 addr = min((unsigned long)iomem_resource.end, 1441 1438 (1UL << MAX_PHYSMEM_BITS) - 1); ··· 1470 1475 devmem->pagemap.ref = &devmem->ref; 1471 1476 devmem->pagemap.data = devmem; 1472 1477 devmem->pagemap.kill = hmm_devmem_ref_kill; 1478 + devmem->pagemap.cleanup = hmm_devmem_ref_exit; 1473 1479 1474 1480 result = devm_memremap_pages(devmem->device, &devmem->pagemap); 1475 1481 if (IS_ERR(result)) ··· 1508 1512 if (ret) 1509 1513 return ERR_PTR(ret); 1510 1514 1511 - ret = devm_add_action_or_reset(device, hmm_devmem_ref_exit, 1512 - &devmem->ref); 1513 - if (ret) 1514 - return ERR_PTR(ret); 1515 - 1516 1515 devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT; 1517 1516 devmem->pfn_last = devmem->pfn_first + 1518 1517 (resource_size(devmem->resource) >> PAGE_SHIFT); ··· 1520 1529 devmem->pagemap.ref = &devmem->ref; 1521 1530 devmem->pagemap.data = devmem; 1522 1531 devmem->pagemap.kill = hmm_devmem_ref_kill; 1532 + devmem->pagemap.cleanup = hmm_devmem_ref_exit; 1523 1533 1524 1534 result = devm_memremap_pages(devmem->device, &devmem->pagemap); 1525 1535 if (IS_ERR(result))
+3
mm/khugepaged.c
··· 1004 1004 * handled by the anon_vma lock + PG_lock. 1005 1005 */ 1006 1006 down_write(&mm->mmap_sem); 1007 + result = SCAN_ANY_PROCESS; 1008 + if (!mmget_still_valid(mm)) 1009 + goto out; 1007 1010 result = hugepage_vma_revalidate(mm, address, &vma); 1008 1011 if (result) 1009 1012 goto out;
+1 -1
mm/list_lru.c
··· 354 354 } 355 355 return 0; 356 356 fail: 357 - __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1); 357 + __memcg_destroy_list_lru_node(memcg_lrus, begin, i); 358 358 return -ENOMEM; 359 359 } 360 360
+28 -13
mm/memcontrol.c
··· 691 691 if (mem_cgroup_disabled()) 692 692 return; 693 693 694 + __this_cpu_add(memcg->vmstats_local->stat[idx], val); 695 + 694 696 x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]); 695 697 if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { 696 698 struct mem_cgroup *mi; 697 699 698 - atomic_long_add(x, &memcg->vmstats_local[idx]); 699 700 for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) 700 701 atomic_long_add(x, &mi->vmstats[idx]); 701 702 x = 0; ··· 746 745 __mod_memcg_state(memcg, idx, val); 747 746 748 747 /* Update lruvec */ 748 + __this_cpu_add(pn->lruvec_stat_local->count[idx], val); 749 + 749 750 x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]); 750 751 if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { 751 752 struct mem_cgroup_per_node *pi; 752 753 753 - atomic_long_add(x, &pn->lruvec_stat_local[idx]); 754 754 for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id)) 755 755 atomic_long_add(x, &pi->lruvec_stat[idx]); 756 756 x = 0; ··· 773 771 if (mem_cgroup_disabled()) 774 772 return; 775 773 774 + __this_cpu_add(memcg->vmstats_local->events[idx], count); 775 + 776 776 x = count + __this_cpu_read(memcg->vmstats_percpu->events[idx]); 777 777 if (unlikely(x > MEMCG_CHARGE_BATCH)) { 778 778 struct mem_cgroup *mi; 779 779 780 - atomic_long_add(x, &memcg->vmevents_local[idx]); 781 780 for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) 782 781 atomic_long_add(x, &mi->vmevents[idx]); 783 782 x = 0; ··· 793 790 794 791 static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event) 795 792 { 796 - return atomic_long_read(&memcg->vmevents_local[event]); 793 + long x = 0; 794 + int cpu; 795 + 796 + for_each_possible_cpu(cpu) 797 + x += per_cpu(memcg->vmstats_local->events[event], cpu); 798 + return x; 797 799 } 798 800 799 801 static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, ··· 2199 2191 long x; 2200 2192 2201 2193 x = this_cpu_xchg(memcg->vmstats_percpu->stat[i], 0); 2202 - if (x) { 2203 - atomic_long_add(x, &memcg->vmstats_local[i]); 2194 + if (x) 2204 2195 for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) 2205 2196 atomic_long_add(x, &memcg->vmstats[i]); 2206 - } 2207 2197 2208 2198 if (i >= NR_VM_NODE_STAT_ITEMS) 2209 2199 continue; ··· 2211 2205 2212 2206 pn = mem_cgroup_nodeinfo(memcg, nid); 2213 2207 x = this_cpu_xchg(pn->lruvec_stat_cpu->count[i], 0); 2214 - if (x) { 2215 - atomic_long_add(x, &pn->lruvec_stat_local[i]); 2208 + if (x) 2216 2209 do { 2217 2210 atomic_long_add(x, &pn->lruvec_stat[i]); 2218 2211 } while ((pn = parent_nodeinfo(pn, nid))); 2219 - } 2220 2212 } 2221 2213 } 2222 2214 ··· 2222 2218 long x; 2223 2219 2224 2220 x = this_cpu_xchg(memcg->vmstats_percpu->events[i], 0); 2225 - if (x) { 2226 - atomic_long_add(x, &memcg->vmevents_local[i]); 2221 + if (x) 2227 2222 for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) 2228 2223 atomic_long_add(x, &memcg->vmevents[i]); 2229 - } 2230 2224 } 2231 2225 } 2232 2226 ··· 4485 4483 if (!pn) 4486 4484 return 1; 4487 4485 4486 + pn->lruvec_stat_local = alloc_percpu(struct lruvec_stat); 4487 + if (!pn->lruvec_stat_local) { 4488 + kfree(pn); 4489 + return 1; 4490 + } 4491 + 4488 4492 pn->lruvec_stat_cpu = alloc_percpu(struct lruvec_stat); 4489 4493 if (!pn->lruvec_stat_cpu) { 4494 + free_percpu(pn->lruvec_stat_local); 4490 4495 kfree(pn); 4491 4496 return 1; 4492 4497 } ··· 4515 4506 return; 4516 4507 4517 4508 free_percpu(pn->lruvec_stat_cpu); 4509 + free_percpu(pn->lruvec_stat_local); 4518 4510 kfree(pn); 4519 4511 } 4520 4512 ··· 4526 4516 for_each_node(node) 4527 4517 free_mem_cgroup_per_node_info(memcg, node); 4528 4518 free_percpu(memcg->vmstats_percpu); 4519 + free_percpu(memcg->vmstats_local); 4529 4520 kfree(memcg); 4530 4521 } 4531 4522 ··· 4553 4542 1, MEM_CGROUP_ID_MAX, 4554 4543 GFP_KERNEL); 4555 4544 if (memcg->id.id < 0) 4545 + goto fail; 4546 + 4547 + memcg->vmstats_local = alloc_percpu(struct memcg_vmstats_percpu); 4548 + if (!memcg->vmstats_local) 4556 4549 goto fail; 4557 4550 4558 4551 memcg->vmstats_percpu = alloc_percpu(struct memcg_vmstats_percpu);
+4 -3
mm/mlock.c
··· 636 636 * is also counted. 637 637 * Return value: previously mlocked page counts 638 638 */ 639 - static int count_mm_mlocked_page_nr(struct mm_struct *mm, 639 + static unsigned long count_mm_mlocked_page_nr(struct mm_struct *mm, 640 640 unsigned long start, size_t len) 641 641 { 642 642 struct vm_area_struct *vma; 643 - int count = 0; 643 + unsigned long count = 0; 644 644 645 645 if (mm == NULL) 646 646 mm = current->mm; ··· 797 797 unsigned long lock_limit; 798 798 int ret; 799 799 800 - if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT))) 800 + if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) || 801 + flags == MCL_ONFAULT) 801 802 return -EINVAL; 802 803 803 804 if (!can_do_mlock())
+19 -5
mm/mmu_gather.c
··· 245 245 { 246 246 /* 247 247 * If there are parallel threads are doing PTE changes on same range 248 - * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB 249 - * flush by batching, a thread has stable TLB entry can fail to flush 250 - * the TLB by observing pte_none|!pte_dirty, for example so flush TLB 251 - * forcefully if we detect parallel PTE batching threads. 248 + * under non-exclusive lock (e.g., mmap_sem read-side) but defer TLB 249 + * flush by batching, one thread may end up seeing inconsistent PTEs 250 + * and result in having stale TLB entries. So flush TLB forcefully 251 + * if we detect parallel PTE batching threads. 252 + * 253 + * However, some syscalls, e.g. munmap(), may free page tables, this 254 + * needs force flush everything in the given range. Otherwise this 255 + * may result in having stale TLB entries for some architectures, 256 + * e.g. aarch64, that could specify flush what level TLB. 252 257 */ 253 258 if (mm_tlb_flush_nested(tlb->mm)) { 259 + /* 260 + * The aarch64 yields better performance with fullmm by 261 + * avoiding multiple CPUs spamming TLBI messages at the 262 + * same time. 263 + * 264 + * On x86 non-fullmm doesn't yield significant difference 265 + * against fullmm. 266 + */ 267 + tlb->fullmm = 1; 254 268 __tlb_reset_range(tlb); 255 - __tlb_adjust_range(tlb, start, end - start); 269 + tlb->freed_tables = 1; 256 270 } 257 271 258 272 tlb_flush_mmu(tlb);
+3 -3
mm/vmscan.c
··· 1505 1505 1506 1506 list_for_each_entry_safe(page, next, page_list, lru) { 1507 1507 if (page_is_file_cache(page) && !PageDirty(page) && 1508 - !__PageMovable(page)) { 1508 + !__PageMovable(page) && !PageUnevictable(page)) { 1509 1509 ClearPageActive(page); 1510 1510 list_move(&page->lru, &clean_pages); 1511 1511 } ··· 1953 1953 if (global_reclaim(sc)) 1954 1954 __count_vm_events(item, nr_reclaimed); 1955 1955 __count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed); 1956 - reclaim_stat->recent_rotated[0] = stat.nr_activate[0]; 1957 - reclaim_stat->recent_rotated[1] = stat.nr_activate[1]; 1956 + reclaim_stat->recent_rotated[0] += stat.nr_activate[0]; 1957 + reclaim_stat->recent_rotated[1] += stat.nr_activate[1]; 1958 1958 1959 1959 move_pages_to_lru(lruvec, &page_list); 1960 1960
+1 -1
scripts/decode_stacktrace.sh
··· 73 73 if [[ "${cache[$module,$address]+isset}" == "isset" ]]; then 74 74 local code=${cache[$module,$address]} 75 75 else 76 - local code=$(addr2line -i -e "$objfile" "$address") 76 + local code=$(${CROSS_COMPILE}addr2line -i -e "$objfile" "$address") 77 77 cache[$module,$address]=$code 78 78 fi 79 79
+2
tools/testing/nvdimm/test/iomap.c
··· 100 100 { 101 101 struct dev_pagemap *pgmap = _pgmap; 102 102 103 + WARN_ON(!pgmap || !pgmap->ref || !pgmap->kill || !pgmap->cleanup); 103 104 pgmap->kill(pgmap->ref); 105 + pgmap->cleanup(pgmap->ref); 104 106 } 105 107 106 108 void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)