Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'memblock-v6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock

Pull mm-init update from Mike Rapoport:
"Simplify deferred initialization of struct pages

Refactor and simplify deferred initialization of the memory map.

Beside the negative diffstat it gives 3ms (55ms vs 58ms) reduction in
the initialization of deferred pages on single node system with 64GiB
of RAM"

* tag 'memblock-v6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock:
memblock: drop for_each_free_mem_pfn_range_in_zone_from()
mm/mm_init: drop deferred_init_maxorder()
mm/mm_init: deferred_init_memmap: use a job per zone
mm/mm_init: use deferred_init_memmap_chunk() in deferred_grow_zone()

+67 -221
-1
.clang-format
··· 294 294 - 'for_each_fib6_node_rt_rcu' 295 295 - 'for_each_fib6_walker_rt' 296 296 - 'for_each_file_lock' 297 - - 'for_each_free_mem_pfn_range_in_zone_from' 298 297 - 'for_each_free_mem_range' 299 298 - 'for_each_free_mem_range_reverse' 300 299 - 'for_each_func_rsrc'
-22
include/linux/memblock.h
··· 324 324 for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \ 325 325 i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) 326 326 327 - #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 328 - void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, 329 - unsigned long *out_spfn, 330 - unsigned long *out_epfn); 331 - 332 - /** 333 - * for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific 334 - * free memblock areas from a given point 335 - * @i: u64 used as loop variable 336 - * @zone: zone in which all of the memory blocks reside 337 - * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL 338 - * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL 339 - * 340 - * Walks over free (memory && !reserved) areas of memblock in a specific 341 - * zone, continuing from current position. Available as soon as memblock is 342 - * initialized. 343 - */ 344 - #define for_each_free_mem_pfn_range_in_zone_from(i, zone, p_start, p_end) \ 345 - for (; i != U64_MAX; \ 346 - __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end)) 347 - 348 - #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ 349 327 350 328 /** 351 329 * for_each_free_mem_range - iterate through free memblock areas
-64
mm/memblock.c
··· 1445 1445 return 0; 1446 1446 } 1447 1447 1448 - #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 1449 - /** 1450 - * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone() 1451 - * 1452 - * @idx: pointer to u64 loop variable 1453 - * @zone: zone in which all of the memory blocks reside 1454 - * @out_spfn: ptr to ulong for start pfn of the range, can be %NULL 1455 - * @out_epfn: ptr to ulong for end pfn of the range, can be %NULL 1456 - * 1457 - * This function is meant to be a zone/pfn specific wrapper for the 1458 - * for_each_mem_range type iterators. Specifically they are used in the 1459 - * deferred memory init routines and as such we were duplicating much of 1460 - * this logic throughout the code. So instead of having it in multiple 1461 - * locations it seemed like it would make more sense to centralize this to 1462 - * one new iterator that does everything they need. 1463 - */ 1464 - void __init_memblock 1465 - __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, 1466 - unsigned long *out_spfn, unsigned long *out_epfn) 1467 - { 1468 - int zone_nid = zone_to_nid(zone); 1469 - phys_addr_t spa, epa; 1470 - 1471 - __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, 1472 - &memblock.memory, &memblock.reserved, 1473 - &spa, &epa, NULL); 1474 - 1475 - while (*idx != U64_MAX) { 1476 - unsigned long epfn = PFN_DOWN(epa); 1477 - unsigned long spfn = PFN_UP(spa); 1478 - 1479 - /* 1480 - * Verify the end is at least past the start of the zone and 1481 - * that we have at least one PFN to initialize. 1482 - */ 1483 - if (zone->zone_start_pfn < epfn && spfn < epfn) { 1484 - /* if we went too far just stop searching */ 1485 - if (zone_end_pfn(zone) <= spfn) { 1486 - *idx = U64_MAX; 1487 - break; 1488 - } 1489 - 1490 - if (out_spfn) 1491 - *out_spfn = max(zone->zone_start_pfn, spfn); 1492 - if (out_epfn) 1493 - *out_epfn = min(zone_end_pfn(zone), epfn); 1494 - 1495 - return; 1496 - } 1497 - 1498 - __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, 1499 - &memblock.memory, &memblock.reserved, 1500 - &spa, &epa, NULL); 1501 - } 1502 - 1503 - /* signal end of iteration */ 1504 - if (out_spfn) 1505 - *out_spfn = ULONG_MAX; 1506 - if (out_epfn) 1507 - *out_epfn = 0; 1508 - } 1509 - 1510 - #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ 1511 - 1512 1448 /** 1513 1449 * memblock_alloc_range_nid - allocate boot memory block 1514 1450 * @size: size of memory block to be allocated in bytes
+67 -134
mm/mm_init.c
··· 2045 2045 } 2046 2046 2047 2047 /* 2048 - * This function is meant to pre-load the iterator for the zone init from 2049 - * a given point. 2050 - * Specifically it walks through the ranges starting with initial index 2051 - * passed to it until we are caught up to the first_init_pfn value and 2052 - * exits there. If we never encounter the value we return false indicating 2053 - * there are no valid ranges left. 2054 - */ 2055 - static bool __init 2056 - deferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone, 2057 - unsigned long *spfn, unsigned long *epfn, 2058 - unsigned long first_init_pfn) 2059 - { 2060 - u64 j = *i; 2061 - 2062 - if (j == 0) 2063 - __next_mem_pfn_range_in_zone(&j, zone, spfn, epfn); 2064 - 2065 - /* 2066 - * Start out by walking through the ranges in this zone that have 2067 - * already been initialized. We don't need to do anything with them 2068 - * so we just need to flush them out of the system. 2069 - */ 2070 - for_each_free_mem_pfn_range_in_zone_from(j, zone, spfn, epfn) { 2071 - if (*epfn <= first_init_pfn) 2072 - continue; 2073 - if (*spfn < first_init_pfn) 2074 - *spfn = first_init_pfn; 2075 - *i = j; 2076 - return true; 2077 - } 2078 - 2079 - return false; 2080 - } 2081 - 2082 - /* 2083 - * Initialize and free pages. We do it in two loops: first we initialize 2084 - * struct page, then free to buddy allocator, because while we are 2085 - * freeing pages we can access pages that are ahead (computing buddy 2086 - * page in __free_one_page()). 2048 + * Initialize and free pages. 2087 2049 * 2088 - * In order to try and keep some memory in the cache we have the loop 2089 - * broken along max page order boundaries. This way we will not cause 2090 - * any issues with the buddy page computation. 2050 + * At this point reserved pages and struct pages that correspond to holes in 2051 + * memblock.memory are already intialized so every free range has a valid 2052 + * memory map around it. 2053 + * This ensures that access of pages that are ahead of the range being 2054 + * initialized (computing buddy page in __free_one_page()) always reads a valid 2055 + * struct page. 2056 + * 2057 + * In order to try and improve CPU cache locality we have the loop broken along 2058 + * max page order boundaries. 2091 2059 */ 2092 2060 static unsigned long __init 2093 - deferred_init_maxorder(u64 *i, struct zone *zone, unsigned long *start_pfn, 2094 - unsigned long *end_pfn) 2061 + deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn, 2062 + struct zone *zone) 2095 2063 { 2096 - unsigned long mo_pfn = ALIGN(*start_pfn + 1, MAX_ORDER_NR_PAGES); 2097 - unsigned long spfn = *start_pfn, epfn = *end_pfn; 2064 + int nid = zone_to_nid(zone); 2098 2065 unsigned long nr_pages = 0; 2099 - u64 j = *i; 2066 + phys_addr_t start, end; 2067 + u64 i = 0; 2100 2068 2101 - /* First we loop through and initialize the page values */ 2102 - for_each_free_mem_pfn_range_in_zone_from(j, zone, start_pfn, end_pfn) { 2103 - unsigned long t; 2069 + for_each_free_mem_range(i, nid, 0, &start, &end, NULL) { 2070 + unsigned long spfn = PFN_UP(start); 2071 + unsigned long epfn = PFN_DOWN(end); 2104 2072 2105 - if (mo_pfn <= *start_pfn) 2073 + if (spfn >= end_pfn) 2106 2074 break; 2107 2075 2108 - t = min(mo_pfn, *end_pfn); 2109 - nr_pages += deferred_init_pages(zone, *start_pfn, t); 2076 + spfn = max(spfn, start_pfn); 2077 + epfn = min(epfn, end_pfn); 2110 2078 2111 - if (mo_pfn < *end_pfn) { 2112 - *start_pfn = mo_pfn; 2113 - break; 2079 + while (spfn < epfn) { 2080 + unsigned long mo_pfn = ALIGN(spfn + 1, MAX_ORDER_NR_PAGES); 2081 + unsigned long chunk_end = min(mo_pfn, epfn); 2082 + 2083 + nr_pages += deferred_init_pages(zone, spfn, chunk_end); 2084 + deferred_free_pages(spfn, chunk_end - spfn); 2085 + 2086 + spfn = chunk_end; 2087 + 2088 + if (irqs_disabled()) 2089 + touch_nmi_watchdog(); 2090 + else 2091 + cond_resched(); 2114 2092 } 2115 - } 2116 - 2117 - /* Reset values and now loop through freeing pages as needed */ 2118 - swap(j, *i); 2119 - 2120 - for_each_free_mem_pfn_range_in_zone_from(j, zone, &spfn, &epfn) { 2121 - unsigned long t; 2122 - 2123 - if (mo_pfn <= spfn) 2124 - break; 2125 - 2126 - t = min(mo_pfn, epfn); 2127 - deferred_free_pages(spfn, t - spfn); 2128 - 2129 - if (mo_pfn <= epfn) 2130 - break; 2131 2093 } 2132 2094 2133 2095 return nr_pages; 2134 2096 } 2135 2097 2136 2098 static void __init 2137 - deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn, 2138 - void *arg) 2099 + deferred_init_memmap_job(unsigned long start_pfn, unsigned long end_pfn, 2100 + void *arg) 2139 2101 { 2140 - unsigned long spfn, epfn; 2141 2102 struct zone *zone = arg; 2142 - u64 i = 0; 2143 2103 2144 - deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn); 2145 - 2146 - /* 2147 - * Initialize and free pages in MAX_PAGE_ORDER sized increments so that 2148 - * we can avoid introducing any issues with the buddy allocator. 2149 - */ 2150 - while (spfn < end_pfn) { 2151 - deferred_init_maxorder(&i, zone, &spfn, &epfn); 2152 - cond_resched(); 2153 - } 2104 + deferred_init_memmap_chunk(start_pfn, end_pfn, zone); 2154 2105 } 2155 2106 2156 2107 static unsigned int __init ··· 2115 2164 { 2116 2165 pg_data_t *pgdat = data; 2117 2166 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); 2118 - unsigned long spfn = 0, epfn = 0; 2119 - unsigned long first_init_pfn, flags; 2167 + int max_threads = deferred_page_init_max_threads(cpumask); 2168 + unsigned long first_init_pfn, last_pfn, flags; 2120 2169 unsigned long start = jiffies; 2121 2170 struct zone *zone; 2122 - int max_threads; 2123 - u64 i = 0; 2124 2171 2125 2172 /* Bind memory initialisation thread to a local node if possible */ 2126 2173 if (!cpumask_empty(cpumask)) ··· 2146 2197 2147 2198 /* Only the highest zone is deferred */ 2148 2199 zone = pgdat->node_zones + pgdat->nr_zones - 1; 2200 + last_pfn = SECTION_ALIGN_UP(zone_end_pfn(zone)); 2149 2201 2150 - max_threads = deferred_page_init_max_threads(cpumask); 2202 + struct padata_mt_job job = { 2203 + .thread_fn = deferred_init_memmap_job, 2204 + .fn_arg = zone, 2205 + .start = first_init_pfn, 2206 + .size = last_pfn - first_init_pfn, 2207 + .align = PAGES_PER_SECTION, 2208 + .min_chunk = PAGES_PER_SECTION, 2209 + .max_threads = max_threads, 2210 + .numa_aware = false, 2211 + }; 2151 2212 2152 - while (deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, first_init_pfn)) { 2153 - first_init_pfn = ALIGN(epfn, PAGES_PER_SECTION); 2154 - struct padata_mt_job job = { 2155 - .thread_fn = deferred_init_memmap_chunk, 2156 - .fn_arg = zone, 2157 - .start = spfn, 2158 - .size = first_init_pfn - spfn, 2159 - .align = PAGES_PER_SECTION, 2160 - .min_chunk = PAGES_PER_SECTION, 2161 - .max_threads = max_threads, 2162 - .numa_aware = false, 2163 - }; 2164 - 2165 - padata_do_multithreaded(&job); 2166 - } 2213 + padata_do_multithreaded(&job); 2167 2214 2168 2215 /* Sanity check that the next zone really is unpopulated */ 2169 2216 WARN_ON(pgdat->nr_zones < MAX_NR_ZONES && populated_zone(++zone)); ··· 2184 2239 */ 2185 2240 bool __init deferred_grow_zone(struct zone *zone, unsigned int order) 2186 2241 { 2187 - unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION); 2242 + unsigned long nr_pages_needed = SECTION_ALIGN_UP(1 << order); 2188 2243 pg_data_t *pgdat = zone->zone_pgdat; 2189 2244 unsigned long first_deferred_pfn = pgdat->first_deferred_pfn; 2190 2245 unsigned long spfn, epfn, flags; 2191 2246 unsigned long nr_pages = 0; 2192 - u64 i = 0; 2193 2247 2194 2248 /* Only the last zone may have deferred pages */ 2195 2249 if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat)) ··· 2205 2261 return true; 2206 2262 } 2207 2263 2208 - /* If the zone is empty somebody else may have cleared out the zone */ 2209 - if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, 2210 - first_deferred_pfn)) { 2211 - pgdat->first_deferred_pfn = ULONG_MAX; 2212 - pgdat_resize_unlock(pgdat, &flags); 2213 - /* Retry only once. */ 2214 - return first_deferred_pfn != ULONG_MAX; 2264 + /* 2265 + * Initialize at least nr_pages_needed in section chunks. 2266 + * If a section has less free memory than nr_pages_needed, the next 2267 + * section will be also initialized. 2268 + * Note, that it still does not guarantee that allocation of order can 2269 + * be satisfied if the sections are fragmented because of memblock 2270 + * allocations. 2271 + */ 2272 + for (spfn = first_deferred_pfn, epfn = SECTION_ALIGN_UP(spfn + 1); 2273 + nr_pages < nr_pages_needed && spfn < zone_end_pfn(zone); 2274 + spfn = epfn, epfn += PAGES_PER_SECTION) { 2275 + nr_pages += deferred_init_memmap_chunk(spfn, epfn, zone); 2215 2276 } 2216 2277 2217 2278 /* 2218 - * Initialize and free pages in MAX_PAGE_ORDER sized increments so 2219 - * that we can avoid introducing any issues with the buddy 2220 - * allocator. 2279 + * There were no pages to initialize and free which means the zone's 2280 + * memory map is completely initialized. 2221 2281 */ 2222 - while (spfn < epfn) { 2223 - /* update our first deferred PFN for this section */ 2224 - first_deferred_pfn = spfn; 2282 + pgdat->first_deferred_pfn = nr_pages ? spfn : ULONG_MAX; 2225 2283 2226 - nr_pages += deferred_init_maxorder(&i, zone, &spfn, &epfn); 2227 - touch_nmi_watchdog(); 2228 - 2229 - /* We should only stop along section boundaries */ 2230 - if ((first_deferred_pfn ^ spfn) < PAGES_PER_SECTION) 2231 - continue; 2232 - 2233 - /* If our quota has been met we can stop here */ 2234 - if (nr_pages >= nr_pages_needed) 2235 - break; 2236 - } 2237 - 2238 - pgdat->first_deferred_pfn = spfn; 2239 2284 pgdat_resize_unlock(pgdat, &flags); 2240 2285 2241 2286 return nr_pages > 0;