Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'slab/next' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux

Pull SLAB changes from Pekka Enberg:
"The patches from Joonsoo Kim switch mm/slab.c to use 'struct page' for
slab internals similar to mm/slub.c. This reduces memory usage and
improves performance:

https://lkml.org/lkml/2013/10/16/155

Rest of the changes are bug fixes from various people"

* 'slab/next' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux: (21 commits)
mm, slub: fix the typo in mm/slub.c
mm, slub: fix the typo in include/linux/slub_def.h
slub: Handle NULL parameter in kmem_cache_flags
slab: replace non-existing 'struct freelist *' with 'void *'
slab: fix to calm down kmemleak warning
slub: proper kmemleak tracking if CONFIG_SLUB_DEBUG disabled
slab: rename slab_bufctl to slab_freelist
slab: remove useless statement for checking pfmemalloc
slab: use struct page for slab management
slab: replace free and inuse in struct slab with newly introduced active
slab: remove SLAB_LIMIT
slab: remove kmem_bufctl_t
slab: change the management method of free objects of the slab
slab: use __GFP_COMP flag for allocating slab pages
slab: use well-defined macro, virt_to_slab()
slab: overloading the RCU head over the LRU for RCU free
slab: remove cachep in struct slab_rcu
slab: remove nodeid in struct slab
slab: remove colouroff in struct slab
slab: change return type of kmem_getpages() to struct page
...

+281 -376
+16 -8
include/linux/mm_types.h
··· 44 44 /* First double word block */ 45 45 unsigned long flags; /* Atomic flags, some possibly 46 46 * updated asynchronously */ 47 - struct address_space *mapping; /* If low bit clear, points to 48 - * inode address_space, or NULL. 49 - * If page mapped as anonymous 50 - * memory, low bit is set, and 51 - * it points to anon_vma object: 52 - * see PAGE_MAPPING_ANON below. 53 - */ 47 + union { 48 + struct address_space *mapping; /* If low bit clear, points to 49 + * inode address_space, or NULL. 50 + * If page mapped as anonymous 51 + * memory, low bit is set, and 52 + * it points to anon_vma object: 53 + * see PAGE_MAPPING_ANON below. 54 + */ 55 + void *s_mem; /* slab first object */ 56 + }; 57 + 54 58 /* Second double word */ 55 59 struct { 56 60 union { 57 61 pgoff_t index; /* Our offset within mapping. */ 58 - void *freelist; /* slub/slob first free object */ 62 + void *freelist; /* sl[aou]b first free object */ 59 63 bool pfmemalloc; /* If set by the page allocator, 60 64 * ALLOC_NO_WATERMARKS was set 61 65 * and the low watermark was not ··· 115 111 }; 116 112 atomic_t _count; /* Usage count, see below. */ 117 113 }; 114 + unsigned int active; /* SLAB */ 118 115 }; 119 116 }; 120 117 ··· 137 132 138 133 struct list_head list; /* slobs list of pages */ 139 134 struct slab *slab_page; /* slab fields */ 135 + struct rcu_head rcu_head; /* Used by SLAB 136 + * when destroying via RCU 137 + */ 140 138 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS 141 139 pgtable_t pmd_huge_pte; /* protected by page->ptl */ 142 140 #endif
+8 -1
include/linux/slab.h
··· 53 53 * } 54 54 * rcu_read_unlock(); 55 55 * 56 - * See also the comment on struct slab_rcu in mm/slab.c. 56 + * This is useful if we need to approach a kernel structure obliquely, 57 + * from its address obtained without the usual locking. We can lock 58 + * the structure to stabilize it and check it's still at the given address, 59 + * only if we can be sure that the memory has not been meanwhile reused 60 + * for some other kind of object (which our subsystem's lock might corrupt). 61 + * 62 + * rcu_read_lock before reading the address, then rcu_read_unlock after 63 + * taking the spinlock within the structure expected at that address. 57 64 */ 58 65 #define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ 59 66 #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */
+2 -2
include/linux/slab_def.h
··· 27 27 28 28 size_t colour; /* cache colouring range */ 29 29 unsigned int colour_off; /* colour offset */ 30 - struct kmem_cache *slabp_cache; 31 - unsigned int slab_size; 30 + struct kmem_cache *freelist_cache; 31 + unsigned int freelist_size; 32 32 33 33 /* constructor func */ 34 34 void (*ctor)(void *obj);
+1 -1
include/linux/slub_def.h
··· 11 11 enum stat_item { 12 12 ALLOC_FASTPATH, /* Allocation from cpu slab */ 13 13 ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */ 14 - FREE_FASTPATH, /* Free to cpu slub */ 14 + FREE_FASTPATH, /* Free to cpu slab */ 15 15 FREE_SLOWPATH, /* Freeing not to cpu slab */ 16 16 FREE_FROZEN, /* Freeing to frozen slab */ 17 17 FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
+218 -355
mm/slab.c
··· 164 164 static bool pfmemalloc_active __read_mostly; 165 165 166 166 /* 167 - * kmem_bufctl_t: 168 - * 169 - * Bufctl's are used for linking objs within a slab 170 - * linked offsets. 171 - * 172 - * This implementation relies on "struct page" for locating the cache & 173 - * slab an object belongs to. 174 - * This allows the bufctl structure to be small (one int), but limits 175 - * the number of objects a slab (not a cache) can contain when off-slab 176 - * bufctls are used. The limit is the size of the largest general cache 177 - * that does not use off-slab slabs. 178 - * For 32bit archs with 4 kB pages, is this 56. 179 - * This is not serious, as it is only for large objects, when it is unwise 180 - * to have too many per slab. 181 - * Note: This limit can be raised by introducing a general cache whose size 182 - * is less than 512 (PAGE_SIZE<<3), but greater than 256. 183 - */ 184 - 185 - typedef unsigned int kmem_bufctl_t; 186 - #define BUFCTL_END (((kmem_bufctl_t)(~0U))-0) 187 - #define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1) 188 - #define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2) 189 - #define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3) 190 - 191 - /* 192 - * struct slab_rcu 193 - * 194 - * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to 195 - * arrange for kmem_freepages to be called via RCU. This is useful if 196 - * we need to approach a kernel structure obliquely, from its address 197 - * obtained without the usual locking. We can lock the structure to 198 - * stabilize it and check it's still at the given address, only if we 199 - * can be sure that the memory has not been meanwhile reused for some 200 - * other kind of object (which our subsystem's lock might corrupt). 201 - * 202 - * rcu_read_lock before reading the address, then rcu_read_unlock after 203 - * taking the spinlock within the structure expected at that address. 204 - */ 205 - struct slab_rcu { 206 - struct rcu_head head; 207 - struct kmem_cache *cachep; 208 - void *addr; 209 - }; 210 - 211 - /* 212 - * struct slab 213 - * 214 - * Manages the objs in a slab. Placed either at the beginning of mem allocated 215 - * for a slab, or allocated from an general cache. 216 - * Slabs are chained into three list: fully used, partial, fully free slabs. 217 - */ 218 - struct slab { 219 - union { 220 - struct { 221 - struct list_head list; 222 - unsigned long colouroff; 223 - void *s_mem; /* including colour offset */ 224 - unsigned int inuse; /* num of objs active in slab */ 225 - kmem_bufctl_t free; 226 - unsigned short nodeid; 227 - }; 228 - struct slab_rcu __slab_cover_slab_rcu; 229 - }; 230 - }; 231 - 232 - /* 233 167 * struct array_cache 234 168 * 235 169 * Purpose: ··· 390 456 return page->slab_cache; 391 457 } 392 458 393 - static inline struct slab *virt_to_slab(const void *obj) 394 - { 395 - struct page *page = virt_to_head_page(obj); 396 - 397 - VM_BUG_ON(!PageSlab(page)); 398 - return page->slab_page; 399 - } 400 - 401 - static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, 459 + static inline void *index_to_obj(struct kmem_cache *cache, struct page *page, 402 460 unsigned int idx) 403 461 { 404 - return slab->s_mem + cache->size * idx; 462 + return page->s_mem + cache->size * idx; 405 463 } 406 464 407 465 /* ··· 403 477 * reciprocal_divide(offset, cache->reciprocal_buffer_size) 404 478 */ 405 479 static inline unsigned int obj_to_index(const struct kmem_cache *cache, 406 - const struct slab *slab, void *obj) 480 + const struct page *page, void *obj) 407 481 { 408 - u32 offset = (obj - slab->s_mem); 482 + u32 offset = (obj - page->s_mem); 409 483 return reciprocal_divide(offset, cache->reciprocal_buffer_size); 410 484 } 411 485 ··· 567 641 568 642 static size_t slab_mgmt_size(size_t nr_objs, size_t align) 569 643 { 570 - return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); 644 + return ALIGN(nr_objs * sizeof(unsigned int), align); 571 645 } 572 646 573 647 /* ··· 586 660 * on it. For the latter case, the memory allocated for a 587 661 * slab is used for: 588 662 * 589 - * - The struct slab 590 - * - One kmem_bufctl_t for each object 663 + * - One unsigned int for each object 591 664 * - Padding to respect alignment of @align 592 665 * - @buffer_size bytes for each object 593 666 * ··· 599 674 mgmt_size = 0; 600 675 nr_objs = slab_size / buffer_size; 601 676 602 - if (nr_objs > SLAB_LIMIT) 603 - nr_objs = SLAB_LIMIT; 604 677 } else { 605 678 /* 606 679 * Ignore padding for the initial guess. The padding ··· 608 685 * into the memory allocation when taking the padding 609 686 * into account. 610 687 */ 611 - nr_objs = (slab_size - sizeof(struct slab)) / 612 - (buffer_size + sizeof(kmem_bufctl_t)); 688 + nr_objs = (slab_size) / (buffer_size + sizeof(unsigned int)); 613 689 614 690 /* 615 691 * This calculated number will be either the right ··· 617 695 if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size 618 696 > slab_size) 619 697 nr_objs--; 620 - 621 - if (nr_objs > SLAB_LIMIT) 622 - nr_objs = SLAB_LIMIT; 623 698 624 699 mgmt_size = slab_mgmt_size(nr_objs, align); 625 700 } ··· 748 829 return nc; 749 830 } 750 831 751 - static inline bool is_slab_pfmemalloc(struct slab *slabp) 832 + static inline bool is_slab_pfmemalloc(struct page *page) 752 833 { 753 - struct page *page = virt_to_page(slabp->s_mem); 754 - 755 834 return PageSlabPfmemalloc(page); 756 835 } 757 836 ··· 758 841 struct array_cache *ac) 759 842 { 760 843 struct kmem_cache_node *n = cachep->node[numa_mem_id()]; 761 - struct slab *slabp; 844 + struct page *page; 762 845 unsigned long flags; 763 846 764 847 if (!pfmemalloc_active) 765 848 return; 766 849 767 850 spin_lock_irqsave(&n->list_lock, flags); 768 - list_for_each_entry(slabp, &n->slabs_full, list) 769 - if (is_slab_pfmemalloc(slabp)) 851 + list_for_each_entry(page, &n->slabs_full, lru) 852 + if (is_slab_pfmemalloc(page)) 770 853 goto out; 771 854 772 - list_for_each_entry(slabp, &n->slabs_partial, list) 773 - if (is_slab_pfmemalloc(slabp)) 855 + list_for_each_entry(page, &n->slabs_partial, lru) 856 + if (is_slab_pfmemalloc(page)) 774 857 goto out; 775 858 776 - list_for_each_entry(slabp, &n->slabs_free, list) 777 - if (is_slab_pfmemalloc(slabp)) 859 + list_for_each_entry(page, &n->slabs_free, lru) 860 + if (is_slab_pfmemalloc(page)) 778 861 goto out; 779 862 780 863 pfmemalloc_active = false; ··· 814 897 */ 815 898 n = cachep->node[numa_mem_id()]; 816 899 if (!list_empty(&n->slabs_free) && force_refill) { 817 - struct slab *slabp = virt_to_slab(objp); 818 - ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem)); 900 + struct page *page = virt_to_head_page(objp); 901 + ClearPageSlabPfmemalloc(page); 819 902 clear_obj_pfmemalloc(&objp); 820 903 recheck_pfmemalloc_active(cachep, ac); 821 904 return objp; ··· 1016 1099 1017 1100 static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) 1018 1101 { 1019 - struct slab *slabp = virt_to_slab(objp); 1020 - int nodeid = slabp->nodeid; 1102 + int nodeid = page_to_nid(virt_to_page(objp)); 1021 1103 struct kmem_cache_node *n; 1022 1104 struct array_cache *alien = NULL; 1023 1105 int node; ··· 1027 1111 * Make sure we are not freeing a object from another node to the array 1028 1112 * cache on this cpu. 1029 1113 */ 1030 - if (likely(slabp->nodeid == node)) 1114 + if (likely(nodeid == node)) 1031 1115 return 0; 1032 1116 1033 1117 n = cachep->node[node]; ··· 1428 1512 { 1429 1513 int i; 1430 1514 1515 + BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) < 1516 + sizeof(struct rcu_head)); 1431 1517 kmem_cache = &kmem_cache_boot; 1432 1518 setup_node_pointer(kmem_cache); 1433 1519 ··· 1605 1687 slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) 1606 1688 { 1607 1689 struct kmem_cache_node *n; 1608 - struct slab *slabp; 1690 + struct page *page; 1609 1691 unsigned long flags; 1610 1692 int node; 1611 1693 ··· 1624 1706 continue; 1625 1707 1626 1708 spin_lock_irqsave(&n->list_lock, flags); 1627 - list_for_each_entry(slabp, &n->slabs_full, list) { 1709 + list_for_each_entry(page, &n->slabs_full, lru) { 1628 1710 active_objs += cachep->num; 1629 1711 active_slabs++; 1630 1712 } 1631 - list_for_each_entry(slabp, &n->slabs_partial, list) { 1632 - active_objs += slabp->inuse; 1713 + list_for_each_entry(page, &n->slabs_partial, lru) { 1714 + active_objs += page->active; 1633 1715 active_slabs++; 1634 1716 } 1635 - list_for_each_entry(slabp, &n->slabs_free, list) 1717 + list_for_each_entry(page, &n->slabs_free, lru) 1636 1718 num_slabs++; 1637 1719 1638 1720 free_objects += n->free_objects; ··· 1654 1736 * did not request dmaable memory, we might get it, but that 1655 1737 * would be relatively rare and ignorable. 1656 1738 */ 1657 - static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) 1739 + static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, 1740 + int nodeid) 1658 1741 { 1659 1742 struct page *page; 1660 1743 int nr_pages; 1661 - int i; 1662 - 1663 - #ifndef CONFIG_MMU 1664 - /* 1665 - * Nommu uses slab's for process anonymous memory allocations, and thus 1666 - * requires __GFP_COMP to properly refcount higher order allocations 1667 - */ 1668 - flags |= __GFP_COMP; 1669 - #endif 1670 1744 1671 1745 flags |= cachep->allocflags; 1672 1746 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) ··· 1682 1772 else 1683 1773 add_zone_page_state(page_zone(page), 1684 1774 NR_SLAB_UNRECLAIMABLE, nr_pages); 1685 - for (i = 0; i < nr_pages; i++) { 1686 - __SetPageSlab(page + i); 1687 - 1688 - if (page->pfmemalloc) 1689 - SetPageSlabPfmemalloc(page + i); 1690 - } 1775 + __SetPageSlab(page); 1776 + if (page->pfmemalloc) 1777 + SetPageSlabPfmemalloc(page); 1691 1778 memcg_bind_pages(cachep, cachep->gfporder); 1692 1779 1693 1780 if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { ··· 1696 1789 kmemcheck_mark_unallocated_pages(page, nr_pages); 1697 1790 } 1698 1791 1699 - return page_address(page); 1792 + return page; 1700 1793 } 1701 1794 1702 1795 /* 1703 1796 * Interface to system's page release. 1704 1797 */ 1705 - static void kmem_freepages(struct kmem_cache *cachep, void *addr) 1798 + static void kmem_freepages(struct kmem_cache *cachep, struct page *page) 1706 1799 { 1707 - unsigned long i = (1 << cachep->gfporder); 1708 - struct page *page = virt_to_page(addr); 1709 - const unsigned long nr_freed = i; 1800 + const unsigned long nr_freed = (1 << cachep->gfporder); 1710 1801 1711 1802 kmemcheck_free_shadow(page, cachep->gfporder); 1712 1803 ··· 1714 1809 else 1715 1810 sub_zone_page_state(page_zone(page), 1716 1811 NR_SLAB_UNRECLAIMABLE, nr_freed); 1717 - while (i--) { 1718 - BUG_ON(!PageSlab(page)); 1719 - __ClearPageSlabPfmemalloc(page); 1720 - __ClearPageSlab(page); 1721 - page++; 1722 - } 1812 + 1813 + BUG_ON(!PageSlab(page)); 1814 + __ClearPageSlabPfmemalloc(page); 1815 + __ClearPageSlab(page); 1816 + page_mapcount_reset(page); 1817 + page->mapping = NULL; 1723 1818 1724 1819 memcg_release_pages(cachep, cachep->gfporder); 1725 1820 if (current->reclaim_state) 1726 1821 current->reclaim_state->reclaimed_slab += nr_freed; 1727 - free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder); 1822 + __free_memcg_kmem_pages(page, cachep->gfporder); 1728 1823 } 1729 1824 1730 1825 static void kmem_rcu_free(struct rcu_head *head) 1731 1826 { 1732 - struct slab_rcu *slab_rcu = (struct slab_rcu *)head; 1733 - struct kmem_cache *cachep = slab_rcu->cachep; 1827 + struct kmem_cache *cachep; 1828 + struct page *page; 1734 1829 1735 - kmem_freepages(cachep, slab_rcu->addr); 1736 - if (OFF_SLAB(cachep)) 1737 - kmem_cache_free(cachep->slabp_cache, slab_rcu); 1830 + page = container_of(head, struct page, rcu_head); 1831 + cachep = page->slab_cache; 1832 + 1833 + kmem_freepages(cachep, page); 1738 1834 } 1739 1835 1740 1836 #if DEBUG ··· 1884 1978 /* Print some data about the neighboring objects, if they 1885 1979 * exist: 1886 1980 */ 1887 - struct slab *slabp = virt_to_slab(objp); 1981 + struct page *page = virt_to_head_page(objp); 1888 1982 unsigned int objnr; 1889 1983 1890 - objnr = obj_to_index(cachep, slabp, objp); 1984 + objnr = obj_to_index(cachep, page, objp); 1891 1985 if (objnr) { 1892 - objp = index_to_obj(cachep, slabp, objnr - 1); 1986 + objp = index_to_obj(cachep, page, objnr - 1); 1893 1987 realobj = (char *)objp + obj_offset(cachep); 1894 1988 printk(KERN_ERR "Prev obj: start=%p, len=%d\n", 1895 1989 realobj, size); 1896 1990 print_objinfo(cachep, objp, 2); 1897 1991 } 1898 1992 if (objnr + 1 < cachep->num) { 1899 - objp = index_to_obj(cachep, slabp, objnr + 1); 1993 + objp = index_to_obj(cachep, page, objnr + 1); 1900 1994 realobj = (char *)objp + obj_offset(cachep); 1901 1995 printk(KERN_ERR "Next obj: start=%p, len=%d\n", 1902 1996 realobj, size); ··· 1907 2001 #endif 1908 2002 1909 2003 #if DEBUG 1910 - static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp) 2004 + static void slab_destroy_debugcheck(struct kmem_cache *cachep, 2005 + struct page *page) 1911 2006 { 1912 2007 int i; 1913 2008 for (i = 0; i < cachep->num; i++) { 1914 - void *objp = index_to_obj(cachep, slabp, i); 2009 + void *objp = index_to_obj(cachep, page, i); 1915 2010 1916 2011 if (cachep->flags & SLAB_POISON) { 1917 2012 #ifdef CONFIG_DEBUG_PAGEALLOC ··· 1937 2030 } 1938 2031 } 1939 2032 #else 1940 - static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp) 2033 + static void slab_destroy_debugcheck(struct kmem_cache *cachep, 2034 + struct page *page) 1941 2035 { 1942 2036 } 1943 2037 #endif ··· 1952 2044 * Before calling the slab must have been unlinked from the cache. The 1953 2045 * cache-lock is not held/needed. 1954 2046 */ 1955 - static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) 2047 + static void slab_destroy(struct kmem_cache *cachep, struct page *page) 1956 2048 { 1957 - void *addr = slabp->s_mem - slabp->colouroff; 2049 + void *freelist; 1958 2050 1959 - slab_destroy_debugcheck(cachep, slabp); 2051 + freelist = page->freelist; 2052 + slab_destroy_debugcheck(cachep, page); 1960 2053 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { 1961 - struct slab_rcu *slab_rcu; 2054 + struct rcu_head *head; 1962 2055 1963 - slab_rcu = (struct slab_rcu *)slabp; 1964 - slab_rcu->cachep = cachep; 1965 - slab_rcu->addr = addr; 1966 - call_rcu(&slab_rcu->head, kmem_rcu_free); 2056 + /* 2057 + * RCU free overloads the RCU head over the LRU. 2058 + * slab_page has been overloeaded over the LRU, 2059 + * however it is not used from now on so that 2060 + * we can use it safely. 2061 + */ 2062 + head = (void *)&page->rcu_head; 2063 + call_rcu(head, kmem_rcu_free); 2064 + 1967 2065 } else { 1968 - kmem_freepages(cachep, addr); 1969 - if (OFF_SLAB(cachep)) 1970 - kmem_cache_free(cachep->slabp_cache, slabp); 2066 + kmem_freepages(cachep, page); 1971 2067 } 2068 + 2069 + /* 2070 + * From now on, we don't use freelist 2071 + * although actual page can be freed in rcu context 2072 + */ 2073 + if (OFF_SLAB(cachep)) 2074 + kmem_cache_free(cachep->freelist_cache, freelist); 1972 2075 } 1973 2076 1974 2077 /** ··· 2016 2097 * use off-slab slabs. Needed to avoid a possible 2017 2098 * looping condition in cache_grow(). 2018 2099 */ 2019 - offslab_limit = size - sizeof(struct slab); 2020 - offslab_limit /= sizeof(kmem_bufctl_t); 2100 + offslab_limit = size; 2101 + offslab_limit /= sizeof(unsigned int); 2021 2102 2022 2103 if (num > offslab_limit) 2023 2104 break; ··· 2139 2220 int 2140 2221 __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) 2141 2222 { 2142 - size_t left_over, slab_size, ralign; 2223 + size_t left_over, freelist_size, ralign; 2143 2224 gfp_t gfp; 2144 2225 int err; 2145 2226 size_t size = cachep->size; ··· 2258 2339 if (!cachep->num) 2259 2340 return -E2BIG; 2260 2341 2261 - slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) 2262 - + sizeof(struct slab), cachep->align); 2342 + freelist_size = 2343 + ALIGN(cachep->num * sizeof(unsigned int), cachep->align); 2263 2344 2264 2345 /* 2265 2346 * If the slab has been placed off-slab, and we have enough space then 2266 2347 * move it on-slab. This is at the expense of any extra colouring. 2267 2348 */ 2268 - if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) { 2349 + if (flags & CFLGS_OFF_SLAB && left_over >= freelist_size) { 2269 2350 flags &= ~CFLGS_OFF_SLAB; 2270 - left_over -= slab_size; 2351 + left_over -= freelist_size; 2271 2352 } 2272 2353 2273 2354 if (flags & CFLGS_OFF_SLAB) { 2274 2355 /* really off slab. No need for manual alignment */ 2275 - slab_size = 2276 - cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab); 2356 + freelist_size = cachep->num * sizeof(unsigned int); 2277 2357 2278 2358 #ifdef CONFIG_PAGE_POISONING 2279 2359 /* If we're going to use the generic kernel_map_pages() ··· 2289 2371 if (cachep->colour_off < cachep->align) 2290 2372 cachep->colour_off = cachep->align; 2291 2373 cachep->colour = left_over / cachep->colour_off; 2292 - cachep->slab_size = slab_size; 2374 + cachep->freelist_size = freelist_size; 2293 2375 cachep->flags = flags; 2294 - cachep->allocflags = 0; 2376 + cachep->allocflags = __GFP_COMP; 2295 2377 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) 2296 2378 cachep->allocflags |= GFP_DMA; 2297 2379 cachep->size = size; 2298 2380 cachep->reciprocal_buffer_size = reciprocal_value(size); 2299 2381 2300 2382 if (flags & CFLGS_OFF_SLAB) { 2301 - cachep->slabp_cache = kmalloc_slab(slab_size, 0u); 2383 + cachep->freelist_cache = kmalloc_slab(freelist_size, 0u); 2302 2384 /* 2303 2385 * This is a possibility for one of the malloc_sizes caches. 2304 2386 * But since we go off slab only for object size greater than ··· 2306 2388 * this should not happen at all. 2307 2389 * But leave a BUG_ON for some lucky dude. 2308 2390 */ 2309 - BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache)); 2391 + BUG_ON(ZERO_OR_NULL_PTR(cachep->freelist_cache)); 2310 2392 } 2311 2393 2312 2394 err = setup_cpu_cache(cachep, gfp); ··· 2412 2494 { 2413 2495 struct list_head *p; 2414 2496 int nr_freed; 2415 - struct slab *slabp; 2497 + struct page *page; 2416 2498 2417 2499 nr_freed = 0; 2418 2500 while (nr_freed < tofree && !list_empty(&n->slabs_free)) { ··· 2424 2506 goto out; 2425 2507 } 2426 2508 2427 - slabp = list_entry(p, struct slab, list); 2509 + page = list_entry(p, struct page, lru); 2428 2510 #if DEBUG 2429 - BUG_ON(slabp->inuse); 2511 + BUG_ON(page->active); 2430 2512 #endif 2431 - list_del(&slabp->list); 2513 + list_del(&page->lru); 2432 2514 /* 2433 2515 * Safe to drop the lock. The slab is no longer linked 2434 2516 * to the cache. 2435 2517 */ 2436 2518 n->free_objects -= cache->num; 2437 2519 spin_unlock_irq(&n->list_lock); 2438 - slab_destroy(cache, slabp); 2520 + slab_destroy(cache, page); 2439 2521 nr_freed++; 2440 2522 } 2441 2523 out: ··· 2518 2600 * descriptors in kmem_cache_create, we search through the malloc_sizes array. 2519 2601 * If we are creating a malloc_sizes cache here it would not be visible to 2520 2602 * kmem_find_general_cachep till the initialization is complete. 2521 - * Hence we cannot have slabp_cache same as the original cache. 2603 + * Hence we cannot have freelist_cache same as the original cache. 2522 2604 */ 2523 - static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, 2524 - int colour_off, gfp_t local_flags, 2525 - int nodeid) 2605 + static void *alloc_slabmgmt(struct kmem_cache *cachep, 2606 + struct page *page, int colour_off, 2607 + gfp_t local_flags, int nodeid) 2526 2608 { 2527 - struct slab *slabp; 2609 + void *freelist; 2610 + void *addr = page_address(page); 2528 2611 2529 2612 if (OFF_SLAB(cachep)) { 2530 2613 /* Slab management obj is off-slab. */ 2531 - slabp = kmem_cache_alloc_node(cachep->slabp_cache, 2614 + freelist = kmem_cache_alloc_node(cachep->freelist_cache, 2532 2615 local_flags, nodeid); 2533 - /* 2534 - * If the first object in the slab is leaked (it's allocated 2535 - * but no one has a reference to it), we want to make sure 2536 - * kmemleak does not treat the ->s_mem pointer as a reference 2537 - * to the object. Otherwise we will not report the leak. 2538 - */ 2539 - kmemleak_scan_area(&slabp->list, sizeof(struct list_head), 2540 - local_flags); 2541 - if (!slabp) 2616 + if (!freelist) 2542 2617 return NULL; 2543 2618 } else { 2544 - slabp = objp + colour_off; 2545 - colour_off += cachep->slab_size; 2619 + freelist = addr + colour_off; 2620 + colour_off += cachep->freelist_size; 2546 2621 } 2547 - slabp->inuse = 0; 2548 - slabp->colouroff = colour_off; 2549 - slabp->s_mem = objp + colour_off; 2550 - slabp->nodeid = nodeid; 2551 - slabp->free = 0; 2552 - return slabp; 2622 + page->active = 0; 2623 + page->s_mem = addr + colour_off; 2624 + return freelist; 2553 2625 } 2554 2626 2555 - static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp) 2627 + static inline unsigned int *slab_freelist(struct page *page) 2556 2628 { 2557 - return (kmem_bufctl_t *) (slabp + 1); 2629 + return (unsigned int *)(page->freelist); 2558 2630 } 2559 2631 2560 2632 static void cache_init_objs(struct kmem_cache *cachep, 2561 - struct slab *slabp) 2633 + struct page *page) 2562 2634 { 2563 2635 int i; 2564 2636 2565 2637 for (i = 0; i < cachep->num; i++) { 2566 - void *objp = index_to_obj(cachep, slabp, i); 2638 + void *objp = index_to_obj(cachep, page, i); 2567 2639 #if DEBUG 2568 2640 /* need to poison the objs? */ 2569 2641 if (cachep->flags & SLAB_POISON) ··· 2589 2681 if (cachep->ctor) 2590 2682 cachep->ctor(objp); 2591 2683 #endif 2592 - slab_bufctl(slabp)[i] = i + 1; 2684 + slab_freelist(page)[i] = i; 2593 2685 } 2594 - slab_bufctl(slabp)[i - 1] = BUFCTL_END; 2595 2686 } 2596 2687 2597 2688 static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) ··· 2603 2696 } 2604 2697 } 2605 2698 2606 - static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, 2699 + static void *slab_get_obj(struct kmem_cache *cachep, struct page *page, 2607 2700 int nodeid) 2608 2701 { 2609 - void *objp = index_to_obj(cachep, slabp, slabp->free); 2610 - kmem_bufctl_t next; 2702 + void *objp; 2611 2703 2612 - slabp->inuse++; 2613 - next = slab_bufctl(slabp)[slabp->free]; 2704 + objp = index_to_obj(cachep, page, slab_freelist(page)[page->active]); 2705 + page->active++; 2614 2706 #if DEBUG 2615 - slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; 2616 - WARN_ON(slabp->nodeid != nodeid); 2707 + WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid); 2617 2708 #endif 2618 - slabp->free = next; 2619 2709 2620 2710 return objp; 2621 2711 } 2622 2712 2623 - static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, 2713 + static void slab_put_obj(struct kmem_cache *cachep, struct page *page, 2624 2714 void *objp, int nodeid) 2625 2715 { 2626 - unsigned int objnr = obj_to_index(cachep, slabp, objp); 2627 - 2716 + unsigned int objnr = obj_to_index(cachep, page, objp); 2628 2717 #if DEBUG 2629 - /* Verify that the slab belongs to the intended node */ 2630 - WARN_ON(slabp->nodeid != nodeid); 2718 + unsigned int i; 2631 2719 2632 - if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) { 2633 - printk(KERN_ERR "slab: double free detected in cache " 2634 - "'%s', objp %p\n", cachep->name, objp); 2635 - BUG(); 2720 + /* Verify that the slab belongs to the intended node */ 2721 + WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid); 2722 + 2723 + /* Verify double free bug */ 2724 + for (i = page->active; i < cachep->num; i++) { 2725 + if (slab_freelist(page)[i] == objnr) { 2726 + printk(KERN_ERR "slab: double free detected in cache " 2727 + "'%s', objp %p\n", cachep->name, objp); 2728 + BUG(); 2729 + } 2636 2730 } 2637 2731 #endif 2638 - slab_bufctl(slabp)[objnr] = slabp->free; 2639 - slabp->free = objnr; 2640 - slabp->inuse--; 2732 + page->active--; 2733 + slab_freelist(page)[page->active] = objnr; 2641 2734 } 2642 2735 2643 2736 /* ··· 2645 2738 * for the slab allocator to be able to lookup the cache and slab of a 2646 2739 * virtual address for kfree, ksize, and slab debugging. 2647 2740 */ 2648 - static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, 2649 - void *addr) 2741 + static void slab_map_pages(struct kmem_cache *cache, struct page *page, 2742 + void *freelist) 2650 2743 { 2651 - int nr_pages; 2652 - struct page *page; 2653 - 2654 - page = virt_to_page(addr); 2655 - 2656 - nr_pages = 1; 2657 - if (likely(!PageCompound(page))) 2658 - nr_pages <<= cache->gfporder; 2659 - 2660 - do { 2661 - page->slab_cache = cache; 2662 - page->slab_page = slab; 2663 - page++; 2664 - } while (--nr_pages); 2744 + page->slab_cache = cache; 2745 + page->freelist = freelist; 2665 2746 } 2666 2747 2667 2748 /* ··· 2657 2762 * kmem_cache_alloc() when there are no active objs left in a cache. 2658 2763 */ 2659 2764 static int cache_grow(struct kmem_cache *cachep, 2660 - gfp_t flags, int nodeid, void *objp) 2765 + gfp_t flags, int nodeid, struct page *page) 2661 2766 { 2662 - struct slab *slabp; 2767 + void *freelist; 2663 2768 size_t offset; 2664 2769 gfp_t local_flags; 2665 2770 struct kmem_cache_node *n; ··· 2700 2805 * Get mem for the objs. Attempt to allocate a physical page from 2701 2806 * 'nodeid'. 2702 2807 */ 2703 - if (!objp) 2704 - objp = kmem_getpages(cachep, local_flags, nodeid); 2705 - if (!objp) 2808 + if (!page) 2809 + page = kmem_getpages(cachep, local_flags, nodeid); 2810 + if (!page) 2706 2811 goto failed; 2707 2812 2708 2813 /* Get slab management. */ 2709 - slabp = alloc_slabmgmt(cachep, objp, offset, 2814 + freelist = alloc_slabmgmt(cachep, page, offset, 2710 2815 local_flags & ~GFP_CONSTRAINT_MASK, nodeid); 2711 - if (!slabp) 2816 + if (!freelist) 2712 2817 goto opps1; 2713 2818 2714 - slab_map_pages(cachep, slabp, objp); 2819 + slab_map_pages(cachep, page, freelist); 2715 2820 2716 - cache_init_objs(cachep, slabp); 2821 + cache_init_objs(cachep, page); 2717 2822 2718 2823 if (local_flags & __GFP_WAIT) 2719 2824 local_irq_disable(); ··· 2721 2826 spin_lock(&n->list_lock); 2722 2827 2723 2828 /* Make slab active. */ 2724 - list_add_tail(&slabp->list, &(n->slabs_free)); 2829 + list_add_tail(&page->lru, &(n->slabs_free)); 2725 2830 STATS_INC_GROWN(cachep); 2726 2831 n->free_objects += cachep->num; 2727 2832 spin_unlock(&n->list_lock); 2728 2833 return 1; 2729 2834 opps1: 2730 - kmem_freepages(cachep, objp); 2835 + kmem_freepages(cachep, page); 2731 2836 failed: 2732 2837 if (local_flags & __GFP_WAIT) 2733 2838 local_irq_disable(); ··· 2775 2880 static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, 2776 2881 unsigned long caller) 2777 2882 { 2778 - struct page *page; 2779 2883 unsigned int objnr; 2780 - struct slab *slabp; 2884 + struct page *page; 2781 2885 2782 2886 BUG_ON(virt_to_cache(objp) != cachep); 2783 2887 2784 2888 objp -= obj_offset(cachep); 2785 2889 kfree_debugcheck(objp); 2786 2890 page = virt_to_head_page(objp); 2787 - 2788 - slabp = page->slab_page; 2789 2891 2790 2892 if (cachep->flags & SLAB_RED_ZONE) { 2791 2893 verify_redzone_free(cachep, objp); ··· 2792 2900 if (cachep->flags & SLAB_STORE_USER) 2793 2901 *dbg_userword(cachep, objp) = (void *)caller; 2794 2902 2795 - objnr = obj_to_index(cachep, slabp, objp); 2903 + objnr = obj_to_index(cachep, page, objp); 2796 2904 2797 2905 BUG_ON(objnr >= cachep->num); 2798 - BUG_ON(objp != index_to_obj(cachep, slabp, objnr)); 2906 + BUG_ON(objp != index_to_obj(cachep, page, objnr)); 2799 2907 2800 - #ifdef CONFIG_DEBUG_SLAB_LEAK 2801 - slab_bufctl(slabp)[objnr] = BUFCTL_FREE; 2802 - #endif 2803 2908 if (cachep->flags & SLAB_POISON) { 2804 2909 #ifdef CONFIG_DEBUG_PAGEALLOC 2805 2910 if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { ··· 2813 2924 return objp; 2814 2925 } 2815 2926 2816 - static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) 2817 - { 2818 - kmem_bufctl_t i; 2819 - int entries = 0; 2820 - 2821 - /* Check slab's freelist to see if this obj is there. */ 2822 - for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { 2823 - entries++; 2824 - if (entries > cachep->num || i >= cachep->num) 2825 - goto bad; 2826 - } 2827 - if (entries != cachep->num - slabp->inuse) { 2828 - bad: 2829 - printk(KERN_ERR "slab: Internal list corruption detected in " 2830 - "cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n", 2831 - cachep->name, cachep->num, slabp, slabp->inuse, 2832 - print_tainted()); 2833 - print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp, 2834 - sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t), 2835 - 1); 2836 - BUG(); 2837 - } 2838 - } 2839 2927 #else 2840 2928 #define kfree_debugcheck(x) do { } while(0) 2841 2929 #define cache_free_debugcheck(x,objp,z) (objp) 2842 - #define check_slabp(x,y) do { } while(0) 2843 2930 #endif 2844 2931 2845 2932 static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, ··· 2854 2989 2855 2990 while (batchcount > 0) { 2856 2991 struct list_head *entry; 2857 - struct slab *slabp; 2992 + struct page *page; 2858 2993 /* Get slab alloc is to come from. */ 2859 2994 entry = n->slabs_partial.next; 2860 2995 if (entry == &n->slabs_partial) { ··· 2864 2999 goto must_grow; 2865 3000 } 2866 3001 2867 - slabp = list_entry(entry, struct slab, list); 2868 - check_slabp(cachep, slabp); 3002 + page = list_entry(entry, struct page, lru); 2869 3003 check_spinlock_acquired(cachep); 2870 3004 2871 3005 /* ··· 2872 3008 * there must be at least one object available for 2873 3009 * allocation. 2874 3010 */ 2875 - BUG_ON(slabp->inuse >= cachep->num); 3011 + BUG_ON(page->active >= cachep->num); 2876 3012 2877 - while (slabp->inuse < cachep->num && batchcount--) { 3013 + while (page->active < cachep->num && batchcount--) { 2878 3014 STATS_INC_ALLOCED(cachep); 2879 3015 STATS_INC_ACTIVE(cachep); 2880 3016 STATS_SET_HIGH(cachep); 2881 3017 2882 - ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp, 3018 + ac_put_obj(cachep, ac, slab_get_obj(cachep, page, 2883 3019 node)); 2884 3020 } 2885 - check_slabp(cachep, slabp); 2886 3021 2887 3022 /* move slabp to correct slabp list: */ 2888 - list_del(&slabp->list); 2889 - if (slabp->free == BUFCTL_END) 2890 - list_add(&slabp->list, &n->slabs_full); 3023 + list_del(&page->lru); 3024 + if (page->active == cachep->num) 3025 + list_add(&page->list, &n->slabs_full); 2891 3026 else 2892 - list_add(&slabp->list, &n->slabs_partial); 3027 + list_add(&page->list, &n->slabs_partial); 2893 3028 } 2894 3029 2895 3030 must_grow: ··· 2960 3097 *dbg_redzone1(cachep, objp) = RED_ACTIVE; 2961 3098 *dbg_redzone2(cachep, objp) = RED_ACTIVE; 2962 3099 } 2963 - #ifdef CONFIG_DEBUG_SLAB_LEAK 2964 - { 2965 - struct slab *slabp; 2966 - unsigned objnr; 2967 - 2968 - slabp = virt_to_head_page(objp)->slab_page; 2969 - objnr = (unsigned)(objp - slabp->s_mem) / cachep->size; 2970 - slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE; 2971 - } 2972 - #endif 2973 3100 objp += obj_offset(cachep); 2974 3101 if (cachep->ctor && cachep->flags & SLAB_POISON) 2975 3102 cachep->ctor(objp); ··· 3101 3248 * We may trigger various forms of reclaim on the allowed 3102 3249 * set and go into memory reserves if necessary. 3103 3250 */ 3251 + struct page *page; 3252 + 3104 3253 if (local_flags & __GFP_WAIT) 3105 3254 local_irq_enable(); 3106 3255 kmem_flagcheck(cache, flags); 3107 - obj = kmem_getpages(cache, local_flags, numa_mem_id()); 3256 + page = kmem_getpages(cache, local_flags, numa_mem_id()); 3108 3257 if (local_flags & __GFP_WAIT) 3109 3258 local_irq_disable(); 3110 - if (obj) { 3259 + if (page) { 3111 3260 /* 3112 3261 * Insert into the appropriate per node queues 3113 3262 */ 3114 - nid = page_to_nid(virt_to_page(obj)); 3115 - if (cache_grow(cache, flags, nid, obj)) { 3263 + nid = page_to_nid(page); 3264 + if (cache_grow(cache, flags, nid, page)) { 3116 3265 obj = ____cache_alloc_node(cache, 3117 3266 flags | GFP_THISNODE, nid); 3118 3267 if (!obj) ··· 3143 3288 int nodeid) 3144 3289 { 3145 3290 struct list_head *entry; 3146 - struct slab *slabp; 3291 + struct page *page; 3147 3292 struct kmem_cache_node *n; 3148 3293 void *obj; 3149 3294 int x; ··· 3163 3308 goto must_grow; 3164 3309 } 3165 3310 3166 - slabp = list_entry(entry, struct slab, list); 3311 + page = list_entry(entry, struct page, lru); 3167 3312 check_spinlock_acquired_node(cachep, nodeid); 3168 - check_slabp(cachep, slabp); 3169 3313 3170 3314 STATS_INC_NODEALLOCS(cachep); 3171 3315 STATS_INC_ACTIVE(cachep); 3172 3316 STATS_SET_HIGH(cachep); 3173 3317 3174 - BUG_ON(slabp->inuse == cachep->num); 3318 + BUG_ON(page->active == cachep->num); 3175 3319 3176 - obj = slab_get_obj(cachep, slabp, nodeid); 3177 - check_slabp(cachep, slabp); 3320 + obj = slab_get_obj(cachep, page, nodeid); 3178 3321 n->free_objects--; 3179 3322 /* move slabp to correct slabp list: */ 3180 - list_del(&slabp->list); 3323 + list_del(&page->lru); 3181 3324 3182 - if (slabp->free == BUFCTL_END) 3183 - list_add(&slabp->list, &n->slabs_full); 3325 + if (page->active == cachep->num) 3326 + list_add(&page->lru, &n->slabs_full); 3184 3327 else 3185 - list_add(&slabp->list, &n->slabs_partial); 3328 + list_add(&page->lru, &n->slabs_partial); 3186 3329 3187 3330 spin_unlock(&n->list_lock); 3188 3331 goto done; ··· 3330 3477 3331 3478 for (i = 0; i < nr_objects; i++) { 3332 3479 void *objp; 3333 - struct slab *slabp; 3480 + struct page *page; 3334 3481 3335 3482 clear_obj_pfmemalloc(&objpp[i]); 3336 3483 objp = objpp[i]; 3337 3484 3338 - slabp = virt_to_slab(objp); 3485 + page = virt_to_head_page(objp); 3339 3486 n = cachep->node[node]; 3340 - list_del(&slabp->list); 3487 + list_del(&page->lru); 3341 3488 check_spinlock_acquired_node(cachep, node); 3342 - check_slabp(cachep, slabp); 3343 - slab_put_obj(cachep, slabp, objp, node); 3489 + slab_put_obj(cachep, page, objp, node); 3344 3490 STATS_DEC_ACTIVE(cachep); 3345 3491 n->free_objects++; 3346 - check_slabp(cachep, slabp); 3347 3492 3348 3493 /* fixup slab chains */ 3349 - if (slabp->inuse == 0) { 3494 + if (page->active == 0) { 3350 3495 if (n->free_objects > n->free_limit) { 3351 3496 n->free_objects -= cachep->num; 3352 3497 /* No need to drop any previously held ··· 3353 3502 * a different cache, refer to comments before 3354 3503 * alloc_slabmgmt. 3355 3504 */ 3356 - slab_destroy(cachep, slabp); 3505 + slab_destroy(cachep, page); 3357 3506 } else { 3358 - list_add(&slabp->list, &n->slabs_free); 3507 + list_add(&page->lru, &n->slabs_free); 3359 3508 } 3360 3509 } else { 3361 3510 /* Unconditionally move a slab to the end of the 3362 3511 * partial list on free - maximum time for the 3363 3512 * other objects to be freed, too. 3364 3513 */ 3365 - list_add_tail(&slabp->list, &n->slabs_partial); 3514 + list_add_tail(&page->lru, &n->slabs_partial); 3366 3515 } 3367 3516 } 3368 3517 } ··· 3402 3551 3403 3552 p = n->slabs_free.next; 3404 3553 while (p != &(n->slabs_free)) { 3405 - struct slab *slabp; 3554 + struct page *page; 3406 3555 3407 - slabp = list_entry(p, struct slab, list); 3408 - BUG_ON(slabp->inuse); 3556 + page = list_entry(p, struct page, lru); 3557 + BUG_ON(page->active); 3409 3558 3410 3559 i++; 3411 3560 p = p->next; ··· 4009 4158 #ifdef CONFIG_SLABINFO 4010 4159 void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) 4011 4160 { 4012 - struct slab *slabp; 4161 + struct page *page; 4013 4162 unsigned long active_objs; 4014 4163 unsigned long num_objs; 4015 4164 unsigned long active_slabs = 0; ··· 4029 4178 check_irq_on(); 4030 4179 spin_lock_irq(&n->list_lock); 4031 4180 4032 - list_for_each_entry(slabp, &n->slabs_full, list) { 4033 - if (slabp->inuse != cachep->num && !error) 4181 + list_for_each_entry(page, &n->slabs_full, lru) { 4182 + if (page->active != cachep->num && !error) 4034 4183 error = "slabs_full accounting error"; 4035 4184 active_objs += cachep->num; 4036 4185 active_slabs++; 4037 4186 } 4038 - list_for_each_entry(slabp, &n->slabs_partial, list) { 4039 - if (slabp->inuse == cachep->num && !error) 4040 - error = "slabs_partial inuse accounting error"; 4041 - if (!slabp->inuse && !error) 4042 - error = "slabs_partial/inuse accounting error"; 4043 - active_objs += slabp->inuse; 4187 + list_for_each_entry(page, &n->slabs_partial, lru) { 4188 + if (page->active == cachep->num && !error) 4189 + error = "slabs_partial accounting error"; 4190 + if (!page->active && !error) 4191 + error = "slabs_partial accounting error"; 4192 + active_objs += page->active; 4044 4193 active_slabs++; 4045 4194 } 4046 - list_for_each_entry(slabp, &n->slabs_free, list) { 4047 - if (slabp->inuse && !error) 4048 - error = "slabs_free/inuse accounting error"; 4195 + list_for_each_entry(page, &n->slabs_free, lru) { 4196 + if (page->active && !error) 4197 + error = "slabs_free accounting error"; 4049 4198 num_slabs++; 4050 4199 } 4051 4200 free_objects += n->free_objects; ··· 4197 4346 return 1; 4198 4347 } 4199 4348 4200 - static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s) 4349 + static void handle_slab(unsigned long *n, struct kmem_cache *c, 4350 + struct page *page) 4201 4351 { 4202 4352 void *p; 4203 - int i; 4353 + int i, j; 4354 + 4204 4355 if (n[0] == n[1]) 4205 4356 return; 4206 - for (i = 0, p = s->s_mem; i < c->num; i++, p += c->size) { 4207 - if (slab_bufctl(s)[i] != BUFCTL_ACTIVE) 4357 + for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) { 4358 + bool active = true; 4359 + 4360 + for (j = page->active; j < c->num; j++) { 4361 + /* Skip freed item */ 4362 + if (slab_freelist(page)[j] == i) { 4363 + active = false; 4364 + break; 4365 + } 4366 + } 4367 + if (!active) 4208 4368 continue; 4369 + 4209 4370 if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) 4210 4371 return; 4211 4372 } ··· 4242 4379 static int leaks_show(struct seq_file *m, void *p) 4243 4380 { 4244 4381 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list); 4245 - struct slab *slabp; 4382 + struct page *page; 4246 4383 struct kmem_cache_node *n; 4247 4384 const char *name; 4248 4385 unsigned long *x = m->private; ··· 4266 4403 check_irq_on(); 4267 4404 spin_lock_irq(&n->list_lock); 4268 4405 4269 - list_for_each_entry(slabp, &n->slabs_full, list) 4270 - handle_slab(x, cachep, slabp); 4271 - list_for_each_entry(slabp, &n->slabs_partial, list) 4272 - handle_slab(x, cachep, slabp); 4406 + list_for_each_entry(page, &n->slabs_full, lru) 4407 + handle_slab(x, cachep, page); 4408 + list_for_each_entry(page, &n->slabs_partial, lru) 4409 + handle_slab(x, cachep, page); 4273 4410 spin_unlock_irq(&n->list_lock); 4274 4411 } 4275 4412 name = cachep->name;
+36 -9
mm/slub.c
··· 155 155 /* 156 156 * Maximum number of desirable partial slabs. 157 157 * The existence of more partial slabs makes kmem_cache_shrink 158 - * sort the partial list by the number of objects in the. 158 + * sort the partial list by the number of objects in use. 159 159 */ 160 160 #define MAX_PARTIAL 10 161 161 ··· 933 933 * Hooks for other subsystems that check memory allocations. In a typical 934 934 * production configuration these hooks all should produce no code at all. 935 935 */ 936 + static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) 937 + { 938 + kmemleak_alloc(ptr, size, 1, flags); 939 + } 940 + 941 + static inline void kfree_hook(const void *x) 942 + { 943 + kmemleak_free(x); 944 + } 945 + 936 946 static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) 937 947 { 938 948 flags &= gfp_allowed_mask; ··· 1227 1217 /* 1228 1218 * Enable debugging if selected on the kernel commandline. 1229 1219 */ 1230 - if (slub_debug && (!slub_debug_slabs || 1231 - !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))) 1220 + if (slub_debug && (!slub_debug_slabs || (name && 1221 + !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))) 1232 1222 flags |= slub_debug; 1233 1223 1234 1224 return flags; ··· 1270 1260 static inline void dec_slabs_node(struct kmem_cache *s, int node, 1271 1261 int objects) {} 1272 1262 1263 + static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) 1264 + { 1265 + kmemleak_alloc(ptr, size, 1, flags); 1266 + } 1267 + 1268 + static inline void kfree_hook(const void *x) 1269 + { 1270 + kmemleak_free(x); 1271 + } 1272 + 1273 1273 static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) 1274 1274 { return 0; } 1275 1275 1276 1276 static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, 1277 - void *object) {} 1277 + void *object) 1278 + { 1279 + kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, 1280 + flags & gfp_allowed_mask); 1281 + } 1278 1282 1279 - static inline void slab_free_hook(struct kmem_cache *s, void *x) {} 1283 + static inline void slab_free_hook(struct kmem_cache *s, void *x) 1284 + { 1285 + kmemleak_free_recursive(x, s->flags); 1286 + } 1280 1287 1281 1288 #endif /* CONFIG_SLUB_DEBUG */ 1282 1289 ··· 2856 2829 * slab on the node for this slabcache. There are no concurrent accesses 2857 2830 * possible. 2858 2831 * 2859 - * Note that this function only works on the kmalloc_node_cache 2860 - * when allocating for the kmalloc_node_cache. This is used for bootstrapping 2832 + * Note that this function only works on the kmem_cache_node 2833 + * when allocating for the kmem_cache_node. This is used for bootstrapping 2861 2834 * memory on a fresh node that has no slab structures yet. 2862 2835 */ 2863 2836 static void early_kmem_cache_node_alloc(int node) ··· 3299 3272 if (page) 3300 3273 ptr = page_address(page); 3301 3274 3302 - kmemleak_alloc(ptr, size, 1, flags); 3275 + kmalloc_large_node_hook(ptr, size, flags); 3303 3276 return ptr; 3304 3277 } 3305 3278 ··· 3363 3336 page = virt_to_head_page(x); 3364 3337 if (unlikely(!PageSlab(page))) { 3365 3338 BUG_ON(!PageCompound(page)); 3366 - kmemleak_free(x); 3339 + kfree_hook(x); 3367 3340 __free_memcg_kmem_pages(page, compound_order(page)); 3368 3341 return; 3369 3342 }