Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'anonvma'

* anonvma:
anonvma: when setting up page->mapping, we need to pick the _oldest_ anonvma
anon_vma: clone the anon_vma chain in the right order
vma_adjust: fix the copying of anon_vma chains
Simplify and comment on anon_vma re-use for anon_vma_prepare()

+84 -43
+70 -40
mm/mmap.c
··· 507 507 struct address_space *mapping = NULL; 508 508 struct prio_tree_root *root = NULL; 509 509 struct file *file = vma->vm_file; 510 - struct anon_vma *anon_vma = NULL; 511 510 long adjust_next = 0; 512 511 int remove_next = 0; 513 512 514 513 if (next && !insert) { 514 + struct vm_area_struct *exporter = NULL; 515 + 515 516 if (end >= next->vm_end) { 516 517 /* 517 518 * vma expands, overlapping all the next, and ··· 520 519 */ 521 520 again: remove_next = 1 + (end > next->vm_end); 522 521 end = next->vm_end; 523 - anon_vma = next->anon_vma; 522 + exporter = next; 524 523 importer = vma; 525 524 } else if (end > next->vm_start) { 526 525 /* ··· 528 527 * mprotect case 5 shifting the boundary up. 529 528 */ 530 529 adjust_next = (end - next->vm_start) >> PAGE_SHIFT; 531 - anon_vma = next->anon_vma; 530 + exporter = next; 532 531 importer = vma; 533 532 } else if (end < vma->vm_end) { 534 533 /* ··· 537 536 * mprotect case 4 shifting the boundary down. 538 537 */ 539 538 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT); 540 - anon_vma = next->anon_vma; 539 + exporter = vma; 541 540 importer = next; 542 541 } 543 - } 544 542 545 - /* 546 - * When changing only vma->vm_end, we don't really need anon_vma lock. 547 - */ 548 - if (vma->anon_vma && (insert || importer || start != vma->vm_start)) 549 - anon_vma = vma->anon_vma; 550 - if (anon_vma) { 551 543 /* 552 544 * Easily overlooked: when mprotect shifts the boundary, 553 545 * make sure the expanding vma has anon_vma set if the 554 546 * shrinking vma had, to cover any anon pages imported. 555 547 */ 556 - if (importer && !importer->anon_vma) { 557 - /* Block reverse map lookups until things are set up. */ 558 - if (anon_vma_clone(importer, vma)) { 548 + if (exporter && exporter->anon_vma && !importer->anon_vma) { 549 + if (anon_vma_clone(importer, exporter)) 559 550 return -ENOMEM; 560 - } 561 - importer->anon_vma = anon_vma; 551 + importer->anon_vma = exporter->anon_vma; 562 552 } 563 553 } 564 554 ··· 817 825 } 818 826 819 827 /* 828 + * Rough compatbility check to quickly see if it's even worth looking 829 + * at sharing an anon_vma. 830 + * 831 + * They need to have the same vm_file, and the flags can only differ 832 + * in things that mprotect may change. 833 + * 834 + * NOTE! The fact that we share an anon_vma doesn't _have_ to mean that 835 + * we can merge the two vma's. For example, we refuse to merge a vma if 836 + * there is a vm_ops->close() function, because that indicates that the 837 + * driver is doing some kind of reference counting. But that doesn't 838 + * really matter for the anon_vma sharing case. 839 + */ 840 + static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b) 841 + { 842 + return a->vm_end == b->vm_start && 843 + mpol_equal(vma_policy(a), vma_policy(b)) && 844 + a->vm_file == b->vm_file && 845 + !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) && 846 + b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT); 847 + } 848 + 849 + /* 850 + * Do some basic sanity checking to see if we can re-use the anon_vma 851 + * from 'old'. The 'a'/'b' vma's are in VM order - one of them will be 852 + * the same as 'old', the other will be the new one that is trying 853 + * to share the anon_vma. 854 + * 855 + * NOTE! This runs with mm_sem held for reading, so it is possible that 856 + * the anon_vma of 'old' is concurrently in the process of being set up 857 + * by another page fault trying to merge _that_. But that's ok: if it 858 + * is being set up, that automatically means that it will be a singleton 859 + * acceptable for merging, so we can do all of this optimistically. But 860 + * we do that ACCESS_ONCE() to make sure that we never re-load the pointer. 861 + * 862 + * IOW: that the "list_is_singular()" test on the anon_vma_chain only 863 + * matters for the 'stable anon_vma' case (ie the thing we want to avoid 864 + * is to return an anon_vma that is "complex" due to having gone through 865 + * a fork). 866 + * 867 + * We also make sure that the two vma's are compatible (adjacent, 868 + * and with the same memory policies). That's all stable, even with just 869 + * a read lock on the mm_sem. 870 + */ 871 + static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b) 872 + { 873 + if (anon_vma_compatible(a, b)) { 874 + struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma); 875 + 876 + if (anon_vma && list_is_singular(&old->anon_vma_chain)) 877 + return anon_vma; 878 + } 879 + return NULL; 880 + } 881 + 882 + /* 820 883 * find_mergeable_anon_vma is used by anon_vma_prepare, to check 821 884 * neighbouring vmas for a suitable anon_vma, before it goes off 822 885 * to allocate a new anon_vma. It checks because a repetitive ··· 881 834 */ 882 835 struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) 883 836 { 837 + struct anon_vma *anon_vma; 884 838 struct vm_area_struct *near; 885 - unsigned long vm_flags; 886 839 887 840 near = vma->vm_next; 888 841 if (!near) 889 842 goto try_prev; 890 843 891 - /* 892 - * Since only mprotect tries to remerge vmas, match flags 893 - * which might be mprotected into each other later on. 894 - * Neither mlock nor madvise tries to remerge at present, 895 - * so leave their flags as obstructing a merge. 896 - */ 897 - vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); 898 - vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC); 899 - 900 - if (near->anon_vma && vma->vm_end == near->vm_start && 901 - mpol_equal(vma_policy(vma), vma_policy(near)) && 902 - can_vma_merge_before(near, vm_flags, 903 - NULL, vma->vm_file, vma->vm_pgoff + 904 - ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT))) 905 - return near->anon_vma; 844 + anon_vma = reusable_anon_vma(near, vma, near); 845 + if (anon_vma) 846 + return anon_vma; 906 847 try_prev: 907 848 /* 908 849 * It is potentially slow to have to call find_vma_prev here. ··· 903 868 if (!near) 904 869 goto none; 905 870 906 - vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); 907 - vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC); 908 - 909 - if (near->anon_vma && near->vm_end == vma->vm_start && 910 - mpol_equal(vma_policy(near), vma_policy(vma)) && 911 - can_vma_merge_after(near, vm_flags, 912 - NULL, vma->vm_file, vma->vm_pgoff)) 913 - return near->anon_vma; 871 + anon_vma = reusable_anon_vma(near, near, vma); 872 + if (anon_vma) 873 + return anon_vma; 914 874 none: 915 875 /* 916 876 * There's no absolute need to look only at touching neighbours:
+14 -3
mm/rmap.c
··· 182 182 { 183 183 struct anon_vma_chain *avc, *pavc; 184 184 185 - list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) { 185 + list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) { 186 186 avc = anon_vma_chain_alloc(); 187 187 if (!avc) 188 188 goto enomem_failure; ··· 734 734 static void __page_set_anon_rmap(struct page *page, 735 735 struct vm_area_struct *vma, unsigned long address) 736 736 { 737 - struct anon_vma *anon_vma = vma->anon_vma; 737 + struct anon_vma_chain *avc; 738 + struct anon_vma *anon_vma; 738 739 739 - BUG_ON(!anon_vma); 740 + BUG_ON(!vma->anon_vma); 741 + 742 + /* 743 + * We must use the _oldest_ possible anon_vma for the page mapping! 744 + * 745 + * So take the last AVC chain entry in the vma, which is the deepest 746 + * ancestor, and use the anon_vma from that. 747 + */ 748 + avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma); 749 + anon_vma = avc->anon_vma; 750 + 740 751 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; 741 752 page->mapping = (struct address_space *) anon_vma; 742 753 page->index = linear_page_index(vma, address);