Merge tag 'memblock-v6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock

-1

.clang-format

··· 294 294 - 'for_each_fib6_node_rt_rcu' 295 295 - 'for_each_fib6_walker_rt' 296 296 - 'for_each_file_lock' 297 - - 'for_each_free_mem_pfn_range_in_zone_from' 298 297 - 'for_each_free_mem_range' 299 298 - 'for_each_free_mem_range_reverse' 300 299 - 'for_each_func_rsrc'

-22

include/linux/memblock.h

··· 324 324 for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \ 325 325 i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) 326 326 327 - #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 328 - void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, 329 - unsigned long *out_spfn, 330 - unsigned long *out_epfn); 331 - 332 - /** 333 - * for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific 334 - * free memblock areas from a given point 335 - * @i: u64 used as loop variable 336 - * @zone: zone in which all of the memory blocks reside 337 - * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL 338 - * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL 339 - * 340 - * Walks over free (memory && !reserved) areas of memblock in a specific 341 - * zone, continuing from current position. Available as soon as memblock is 342 - * initialized. 343 - */ 344 - #define for_each_free_mem_pfn_range_in_zone_from(i, zone, p_start, p_end) \ 345 - for (; i != U64_MAX; \ 346 - __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end)) 347 - 348 - #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ 349 327 350 328 /** 351 329 * for_each_free_mem_range - iterate through free memblock areas

-64

mm/memblock.c

··· 1445 1445 return 0; 1446 1446 } 1447 1447 1448 - #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 1449 - /** 1450 - * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone() 1451 - * 1452 - * @idx: pointer to u64 loop variable 1453 - * @zone: zone in which all of the memory blocks reside 1454 - * @out_spfn: ptr to ulong for start pfn of the range, can be %NULL 1455 - * @out_epfn: ptr to ulong for end pfn of the range, can be %NULL 1456 - * 1457 - * This function is meant to be a zone/pfn specific wrapper for the 1458 - * for_each_mem_range type iterators. Specifically they are used in the 1459 - * deferred memory init routines and as such we were duplicating much of 1460 - * this logic throughout the code. So instead of having it in multiple 1461 - * locations it seemed like it would make more sense to centralize this to 1462 - * one new iterator that does everything they need. 1463 - */ 1464 - void __init_memblock 1465 - __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, 1466 - unsigned long *out_spfn, unsigned long *out_epfn) 1467 - { 1468 - int zone_nid = zone_to_nid(zone); 1469 - phys_addr_t spa, epa; 1470 - 1471 - __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, 1472 - &memblock.memory, &memblock.reserved, 1473 - &spa, &epa, NULL); 1474 - 1475 - while (*idx != U64_MAX) { 1476 - unsigned long epfn = PFN_DOWN(epa); 1477 - unsigned long spfn = PFN_UP(spa); 1478 - 1479 - /* 1480 - * Verify the end is at least past the start of the zone and 1481 - * that we have at least one PFN to initialize. 1482 - */ 1483 - if (zone->zone_start_pfn < epfn && spfn < epfn) { 1484 - /* if we went too far just stop searching */ 1485 - if (zone_end_pfn(zone) <= spfn) { 1486 - *idx = U64_MAX; 1487 - break; 1488 - } 1489 - 1490 - if (out_spfn) 1491 - *out_spfn = max(zone->zone_start_pfn, spfn); 1492 - if (out_epfn) 1493 - *out_epfn = min(zone_end_pfn(zone), epfn); 1494 - 1495 - return; 1496 - } 1497 - 1498 - __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, 1499 - &memblock.memory, &memblock.reserved, 1500 - &spa, &epa, NULL); 1501 - } 1502 - 1503 - /* signal end of iteration */ 1504 - if (out_spfn) 1505 - *out_spfn = ULONG_MAX; 1506 - if (out_epfn) 1507 - *out_epfn = 0; 1508 - } 1509 - 1510 - #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ 1511 - 1512 1448 /** 1513 1449 * memblock_alloc_range_nid - allocate boot memory block 1514 1450 * @size: size of memory block to be allocated in bytes

+67 -134

mm/mm_init.c

··· 2045 2045 } 2046 2046 2047 2047 /* 2048 - * This function is meant to pre-load the iterator for the zone init from 2049 - * a given point. 2050 - * Specifically it walks through the ranges starting with initial index 2051 - * passed to it until we are caught up to the first_init_pfn value and 2052 - * exits there. If we never encounter the value we return false indicating 2053 - * there are no valid ranges left. 2054 - */ 2055 - static bool __init 2056 - deferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone, 2057 - unsigned long *spfn, unsigned long *epfn, 2058 - unsigned long first_init_pfn) 2059 - { 2060 - u64 j = *i; 2061 - 2062 - if (j == 0) 2063 - __next_mem_pfn_range_in_zone(&j, zone, spfn, epfn); 2064 - 2065 - /* 2066 - * Start out by walking through the ranges in this zone that have 2067 - * already been initialized. We don't need to do anything with them 2068 - * so we just need to flush them out of the system. 2069 - */ 2070 - for_each_free_mem_pfn_range_in_zone_from(j, zone, spfn, epfn) { 2071 - if (*epfn <= first_init_pfn) 2072 - continue; 2073 - if (*spfn < first_init_pfn) 2074 - *spfn = first_init_pfn; 2075 - *i = j; 2076 - return true; 2077 - } 2078 - 2079 - return false; 2080 - } 2081 - 2082 - /* 2083 - * Initialize and free pages. We do it in two loops: first we initialize 2084 - * struct page, then free to buddy allocator, because while we are 2085 - * freeing pages we can access pages that are ahead (computing buddy 2086 - * page in __free_one_page()). 2048 + * Initialize and free pages. 2087 2049 * 2088 - * In order to try and keep some memory in the cache we have the loop 2089 - * broken along max page order boundaries. This way we will not cause 2090 - * any issues with the buddy page computation. 2050 + * At this point reserved pages and struct pages that correspond to holes in 2051 + * memblock.memory are already intialized so every free range has a valid 2052 + * memory map around it. 2053 + * This ensures that access of pages that are ahead of the range being 2054 + * initialized (computing buddy page in __free_one_page()) always reads a valid 2055 + * struct page. 2056 + * 2057 + * In order to try and improve CPU cache locality we have the loop broken along 2058 + * max page order boundaries. 2091 2059 */ 2092 2060 static unsigned long __init 2093 - deferred_init_maxorder(u64 *i, struct zone *zone, unsigned long *start_pfn, 2094 - unsigned long *end_pfn) 2061 + deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn, 2062 + struct zone *zone) 2095 2063 { 2096 - unsigned long mo_pfn = ALIGN(*start_pfn + 1, MAX_ORDER_NR_PAGES); 2097 - unsigned long spfn = *start_pfn, epfn = *end_pfn; 2064 + int nid = zone_to_nid(zone); 2098 2065 unsigned long nr_pages = 0; 2099 - u64 j = *i; 2066 + phys_addr_t start, end; 2067 + u64 i = 0; 2100 2068 2101 - /* First we loop through and initialize the page values */ 2102 - for_each_free_mem_pfn_range_in_zone_from(j, zone, start_pfn, end_pfn) { 2103 - unsigned long t; 2069 + for_each_free_mem_range(i, nid, 0, &start, &end, NULL) { 2070 + unsigned long spfn = PFN_UP(start); 2071 + unsigned long epfn = PFN_DOWN(end); 2104 2072 2105 - if (mo_pfn <= *start_pfn) 2073 + if (spfn >= end_pfn) 2106 2074 break; 2107 2075 2108 - t = min(mo_pfn, *end_pfn); 2109 - nr_pages += deferred_init_pages(zone, *start_pfn, t); 2076 + spfn = max(spfn, start_pfn); 2077 + epfn = min(epfn, end_pfn); 2110 2078 2111 - if (mo_pfn < *end_pfn) { 2112 - *start_pfn = mo_pfn; 2113 - break; 2079 + while (spfn < epfn) { 2080 + unsigned long mo_pfn = ALIGN(spfn + 1, MAX_ORDER_NR_PAGES); 2081 + unsigned long chunk_end = min(mo_pfn, epfn); 2082 + 2083 + nr_pages += deferred_init_pages(zone, spfn, chunk_end); 2084 + deferred_free_pages(spfn, chunk_end - spfn); 2085 + 2086 + spfn = chunk_end; 2087 + 2088 + if (irqs_disabled()) 2089 + touch_nmi_watchdog(); 2090 + else 2091 + cond_resched(); 2114 2092 } 2115 - } 2116 - 2117 - /* Reset values and now loop through freeing pages as needed */ 2118 - swap(j, *i); 2119 - 2120 - for_each_free_mem_pfn_range_in_zone_from(j, zone, &spfn, &epfn) { 2121 - unsigned long t; 2122 - 2123 - if (mo_pfn <= spfn) 2124 - break; 2125 - 2126 - t = min(mo_pfn, epfn); 2127 - deferred_free_pages(spfn, t - spfn); 2128 - 2129 - if (mo_pfn <= epfn) 2130 - break; 2131 2093 } 2132 2094 2133 2095 return nr_pages; 2134 2096 } 2135 2097 2136 2098 static void __init 2137 - deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn, 2138 - void *arg) 2099 + deferred_init_memmap_job(unsigned long start_pfn, unsigned long end_pfn, 2100 + void *arg) 2139 2101 { 2140 - unsigned long spfn, epfn; 2141 2102 struct zone *zone = arg; 2142 - u64 i = 0; 2143 2103 2144 - deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn); 2145 - 2146 - /* 2147 - * Initialize and free pages in MAX_PAGE_ORDER sized increments so that 2148 - * we can avoid introducing any issues with the buddy allocator. 2149 - */ 2150 - while (spfn < end_pfn) { 2151 - deferred_init_maxorder(&i, zone, &spfn, &epfn); 2152 - cond_resched(); 2153 - } 2104 + deferred_init_memmap_chunk(start_pfn, end_pfn, zone); 2154 2105 } 2155 2106 2156 2107 static unsigned int __init ··· 2115 2164 { 2116 2165 pg_data_t *pgdat = data; 2117 2166 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); 2118 - unsigned long spfn = 0, epfn = 0; 2119 - unsigned long first_init_pfn, flags; 2167 + int max_threads = deferred_page_init_max_threads(cpumask); 2168 + unsigned long first_init_pfn, last_pfn, flags; 2120 2169 unsigned long start = jiffies; 2121 2170 struct zone *zone; 2122 - int max_threads; 2123 - u64 i = 0; 2124 2171 2125 2172 /* Bind memory initialisation thread to a local node if possible */ 2126 2173 if (!cpumask_empty(cpumask)) ··· 2146 2197 2147 2198 /* Only the highest zone is deferred */ 2148 2199 zone = pgdat->node_zones + pgdat->nr_zones - 1; 2200 + last_pfn = SECTION_ALIGN_UP(zone_end_pfn(zone)); 2149 2201 2150 - max_threads = deferred_page_init_max_threads(cpumask); 2202 + struct padata_mt_job job = { 2203 + .thread_fn = deferred_init_memmap_job, 2204 + .fn_arg = zone, 2205 + .start = first_init_pfn, 2206 + .size = last_pfn - first_init_pfn, 2207 + .align = PAGES_PER_SECTION, 2208 + .min_chunk = PAGES_PER_SECTION, 2209 + .max_threads = max_threads, 2210 + .numa_aware = false, 2211 + }; 2151 2212 2152 - while (deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, first_init_pfn)) { 2153 - first_init_pfn = ALIGN(epfn, PAGES_PER_SECTION); 2154 - struct padata_mt_job job = { 2155 - .thread_fn = deferred_init_memmap_chunk, 2156 - .fn_arg = zone, 2157 - .start = spfn, 2158 - .size = first_init_pfn - spfn, 2159 - .align = PAGES_PER_SECTION, 2160 - .min_chunk = PAGES_PER_SECTION, 2161 - .max_threads = max_threads, 2162 - .numa_aware = false, 2163 - }; 2164 - 2165 - padata_do_multithreaded(&job); 2166 - } 2213 + padata_do_multithreaded(&job); 2167 2214 2168 2215 /* Sanity check that the next zone really is unpopulated */ 2169 2216 WARN_ON(pgdat->nr_zones < MAX_NR_ZONES && populated_zone(++zone)); ··· 2184 2239 */ 2185 2240 bool __init deferred_grow_zone(struct zone *zone, unsigned int order) 2186 2241 { 2187 - unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION); 2242 + unsigned long nr_pages_needed = SECTION_ALIGN_UP(1 << order); 2188 2243 pg_data_t *pgdat = zone->zone_pgdat; 2189 2244 unsigned long first_deferred_pfn = pgdat->first_deferred_pfn; 2190 2245 unsigned long spfn, epfn, flags; 2191 2246 unsigned long nr_pages = 0; 2192 - u64 i = 0; 2193 2247 2194 2248 /* Only the last zone may have deferred pages */ 2195 2249 if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat)) ··· 2205 2261 return true; 2206 2262 } 2207 2263 2208 - /* If the zone is empty somebody else may have cleared out the zone */ 2209 - if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, 2210 - first_deferred_pfn)) { 2211 - pgdat->first_deferred_pfn = ULONG_MAX; 2212 - pgdat_resize_unlock(pgdat, &flags); 2213 - /* Retry only once. */ 2214 - return first_deferred_pfn != ULONG_MAX; 2264 + /* 2265 + * Initialize at least nr_pages_needed in section chunks. 2266 + * If a section has less free memory than nr_pages_needed, the next 2267 + * section will be also initialized. 2268 + * Note, that it still does not guarantee that allocation of order can 2269 + * be satisfied if the sections are fragmented because of memblock 2270 + * allocations. 2271 + */ 2272 + for (spfn = first_deferred_pfn, epfn = SECTION_ALIGN_UP(spfn + 1); 2273 + nr_pages < nr_pages_needed && spfn < zone_end_pfn(zone); 2274 + spfn = epfn, epfn += PAGES_PER_SECTION) { 2275 + nr_pages += deferred_init_memmap_chunk(spfn, epfn, zone); 2215 2276 } 2216 2277 2217 2278 /* 2218 - * Initialize and free pages in MAX_PAGE_ORDER sized increments so 2219 - * that we can avoid introducing any issues with the buddy 2220 - * allocator. 2279 + * There were no pages to initialize and free which means the zone's 2280 + * memory map is completely initialized. 2221 2281 */ 2222 - while (spfn < epfn) { 2223 - /* update our first deferred PFN for this section */ 2224 - first_deferred_pfn = spfn; 2282 + pgdat->first_deferred_pfn = nr_pages ? spfn : ULONG_MAX; 2225 2283 2226 - nr_pages += deferred_init_maxorder(&i, zone, &spfn, &epfn); 2227 - touch_nmi_watchdog(); 2228 - 2229 - /* We should only stop along section boundaries */ 2230 - if ((first_deferred_pfn ^ spfn) < PAGES_PER_SECTION) 2231 - continue; 2232 - 2233 - /* If our quota has been met we can stop here */ 2234 - if (nr_pages >= nr_pages_needed) 2235 - break; 2236 - } 2237 - 2238 - pgdat->first_deferred_pfn = spfn; 2239 2284 pgdat_resize_unlock(pgdat, &flags); 2240 2285 2241 2286 return nr_pages > 0;

Configure Feed

Configure Feed