Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

aio: clean up and fix aio_setup_ring page mapping

Since commit 36bc08cc01709 ("fs/aio: Add support to aio ring pages
migration") the aio ring setup code has used a special per-ring backing
inode for the page allocations, rather than just using random anonymous
pages.

However, rather than remembering the pages as it allocated them, it
would allocate the pages, insert them into the file mapping (dirty, so
that they couldn't be free'd), and then forget about them. And then to
look them up again, it would mmap the mapping, and then use
"get_user_pages()" to get back an array of the pages we just created.

Now, not only is that incredibly inefficient, it also leaked all the
pages if the mmap failed (which could happen due to excessive number of
mappings, for example).

So clean it all up, making it much more straightforward. Also remove
some left-overs of the previous (broken) mm_populate() usage that was
removed in commit d6c355c7dabc ("aio: fix race in ring buffer page
lookup introduced by page migration support") but left the pointless and
now misleading MAP_POPULATE flag around.

Tested-and-acked-by: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

+24 -36
+24 -36
fs/aio.c
··· 326 326 struct aio_ring *ring; 327 327 unsigned nr_events = ctx->max_reqs; 328 328 struct mm_struct *mm = current->mm; 329 - unsigned long size, populate; 329 + unsigned long size, unused; 330 330 int nr_pages; 331 331 int i; 332 332 struct file *file; ··· 347 347 return -EAGAIN; 348 348 } 349 349 350 - for (i = 0; i < nr_pages; i++) { 351 - struct page *page; 352 - page = find_or_create_page(file->f_inode->i_mapping, 353 - i, GFP_HIGHUSER | __GFP_ZERO); 354 - if (!page) 355 - break; 356 - pr_debug("pid(%d) page[%d]->count=%d\n", 357 - current->pid, i, page_count(page)); 358 - SetPageUptodate(page); 359 - SetPageDirty(page); 360 - unlock_page(page); 361 - } 362 350 ctx->aio_ring_file = file; 363 351 nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) 364 352 / sizeof(struct io_event); ··· 361 373 } 362 374 } 363 375 376 + for (i = 0; i < nr_pages; i++) { 377 + struct page *page; 378 + page = find_or_create_page(file->f_inode->i_mapping, 379 + i, GFP_HIGHUSER | __GFP_ZERO); 380 + if (!page) 381 + break; 382 + pr_debug("pid(%d) page[%d]->count=%d\n", 383 + current->pid, i, page_count(page)); 384 + SetPageUptodate(page); 385 + SetPageDirty(page); 386 + unlock_page(page); 387 + 388 + ctx->ring_pages[i] = page; 389 + } 390 + ctx->nr_pages = i; 391 + 392 + if (unlikely(i != nr_pages)) { 393 + aio_free_ring(ctx); 394 + return -EAGAIN; 395 + } 396 + 364 397 ctx->mmap_size = nr_pages * PAGE_SIZE; 365 398 pr_debug("attempting mmap of %lu bytes\n", ctx->mmap_size); 366 399 367 400 down_write(&mm->mmap_sem); 368 401 ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size, 369 402 PROT_READ | PROT_WRITE, 370 - MAP_SHARED | MAP_POPULATE, 0, &populate); 403 + MAP_SHARED, 0, &unused); 404 + up_write(&mm->mmap_sem); 371 405 if (IS_ERR((void *)ctx->mmap_base)) { 372 - up_write(&mm->mmap_sem); 373 406 ctx->mmap_size = 0; 374 407 aio_free_ring(ctx); 375 408 return -EAGAIN; 376 409 } 377 410 378 411 pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base); 379 - 380 - /* We must do this while still holding mmap_sem for write, as we 381 - * need to be protected against userspace attempting to mremap() 382 - * or munmap() the ring buffer. 383 - */ 384 - ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages, 385 - 1, 0, ctx->ring_pages, NULL); 386 - 387 - /* Dropping the reference here is safe as the page cache will hold 388 - * onto the pages for us. It is also required so that page migration 389 - * can unmap the pages and get the right reference count. 390 - */ 391 - for (i = 0; i < ctx->nr_pages; i++) 392 - put_page(ctx->ring_pages[i]); 393 - 394 - up_write(&mm->mmap_sem); 395 - 396 - if (unlikely(ctx->nr_pages != nr_pages)) { 397 - aio_free_ring(ctx); 398 - return -EAGAIN; 399 - } 400 412 401 413 ctx->user_id = ctx->mmap_base; 402 414 ctx->nr_events = nr_events; /* trusted copy */