Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

io_uring: move mapping/allocation helpers to a separate file

Move the related code from io_uring.c into memmap.c. No functional
changes in this patch, just cleaning it up a bit now that the full
transition is done.

Signed-off-by: Jens Axboe <axboe@kernel.dk>

+367 -335
+2 -1
io_uring/Makefile
··· 8 8 xattr.o nop.o fs.o splice.o sync.o \ 9 9 msg_ring.o advise.o openclose.o \ 10 10 epoll.o statx.o timeout.o fdinfo.o \ 11 - cancel.o waitid.o register.o truncate.o 11 + cancel.o waitid.o register.o \ 12 + truncate.o memmap.o 12 13 obj-$(CONFIG_IO_WQ) += io-wq.o 13 14 obj-$(CONFIG_FUTEX) += futex.o 14 15 obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o
+2 -325
io_uring/io_uring.c
··· 95 95 #include "futex.h" 96 96 #include "napi.h" 97 97 #include "uring_cmd.h" 98 + #include "memmap.h" 98 99 99 100 #include "timeout.h" 100 101 #include "poll.h" ··· 2598 2597 return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; 2599 2598 } 2600 2599 2601 - void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages, 2602 - bool put_pages) 2603 - { 2604 - bool do_vunmap = false; 2605 - 2606 - if (!ptr) 2607 - return; 2608 - 2609 - if (put_pages && *npages) { 2610 - struct page **to_free = *pages; 2611 - int i; 2612 - 2613 - /* 2614 - * Only did vmap for the non-compound multiple page case. 2615 - * For the compound page, we just need to put the head. 2616 - */ 2617 - if (PageCompound(to_free[0])) 2618 - *npages = 1; 2619 - else if (*npages > 1) 2620 - do_vunmap = true; 2621 - for (i = 0; i < *npages; i++) 2622 - put_page(to_free[i]); 2623 - } 2624 - if (do_vunmap) 2625 - vunmap(ptr); 2626 - kvfree(*pages); 2627 - *pages = NULL; 2628 - *npages = 0; 2629 - } 2630 - 2631 - static void io_pages_free(struct page ***pages, int npages) 2632 - { 2633 - struct page **page_array = *pages; 2634 - 2635 - if (!page_array) 2636 - return; 2637 - 2638 - unpin_user_pages(page_array, npages); 2639 - kvfree(page_array); 2640 - *pages = NULL; 2641 - } 2642 - 2643 - struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages) 2644 - { 2645 - unsigned long start, end, nr_pages; 2646 - struct page **pages; 2647 - int ret; 2648 - 2649 - end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 2650 - start = uaddr >> PAGE_SHIFT; 2651 - nr_pages = end - start; 2652 - if (WARN_ON_ONCE(!nr_pages)) 2653 - return ERR_PTR(-EINVAL); 2654 - 2655 - pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); 2656 - if (!pages) 2657 - return ERR_PTR(-ENOMEM); 2658 - 2659 - ret = pin_user_pages_fast(uaddr, nr_pages, FOLL_WRITE | FOLL_LONGTERM, 2660 - pages); 2661 - /* success, mapped all pages */ 2662 - if (ret == nr_pages) { 2663 - *npages = nr_pages; 2664 - return pages; 2665 - } 2666 - 2667 - /* partial map, or didn't map anything */ 2668 - if (ret >= 0) { 2669 - /* if we did partial map, release any pages we did get */ 2670 - if (ret) 2671 - unpin_user_pages(pages, ret); 2672 - ret = -EFAULT; 2673 - } 2674 - kvfree(pages); 2675 - return ERR_PTR(ret); 2676 - } 2677 - 2678 - static void *__io_uaddr_map(struct page ***pages, unsigned short *npages, 2679 - unsigned long uaddr, size_t size) 2680 - { 2681 - struct page **page_array; 2682 - unsigned int nr_pages; 2683 - void *page_addr; 2684 - 2685 - *npages = 0; 2686 - 2687 - if (uaddr & (PAGE_SIZE - 1) || !size) 2688 - return ERR_PTR(-EINVAL); 2689 - 2690 - nr_pages = 0; 2691 - page_array = io_pin_pages(uaddr, size, &nr_pages); 2692 - if (IS_ERR(page_array)) 2693 - return page_array; 2694 - 2695 - page_addr = vmap(page_array, nr_pages, VM_MAP, PAGE_KERNEL); 2696 - if (page_addr) { 2697 - *pages = page_array; 2698 - *npages = nr_pages; 2699 - return page_addr; 2700 - } 2701 - 2702 - io_pages_free(&page_array, nr_pages); 2703 - return ERR_PTR(-ENOMEM); 2704 - } 2705 - 2706 2600 static void *io_rings_map(struct io_ring_ctx *ctx, unsigned long uaddr, 2707 2601 size_t size) 2708 2602 { ··· 2630 2734 2631 2735 ctx->rings = NULL; 2632 2736 ctx->sq_sqes = NULL; 2633 - } 2634 - 2635 - static void *io_mem_alloc_compound(struct page **pages, int nr_pages, 2636 - size_t size, gfp_t gfp) 2637 - { 2638 - struct page *page; 2639 - int i, order; 2640 - 2641 - order = get_order(size); 2642 - if (order > MAX_PAGE_ORDER) 2643 - return ERR_PTR(-ENOMEM); 2644 - else if (order) 2645 - gfp |= __GFP_COMP; 2646 - 2647 - page = alloc_pages(gfp, order); 2648 - if (!page) 2649 - return ERR_PTR(-ENOMEM); 2650 - 2651 - for (i = 0; i < nr_pages; i++) 2652 - pages[i] = page + i; 2653 - 2654 - return page_address(page); 2655 - } 2656 - 2657 - static void *io_mem_alloc_single(struct page **pages, int nr_pages, size_t size, 2658 - gfp_t gfp) 2659 - { 2660 - void *ret; 2661 - int i; 2662 - 2663 - for (i = 0; i < nr_pages; i++) { 2664 - pages[i] = alloc_page(gfp); 2665 - if (!pages[i]) 2666 - goto err; 2667 - } 2668 - 2669 - ret = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); 2670 - if (ret) 2671 - return ret; 2672 - err: 2673 - while (i--) 2674 - put_page(pages[i]); 2675 - return ERR_PTR(-ENOMEM); 2676 - } 2677 - 2678 - void *io_pages_map(struct page ***out_pages, unsigned short *npages, 2679 - size_t size) 2680 - { 2681 - gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN; 2682 - struct page **pages; 2683 - int nr_pages; 2684 - void *ret; 2685 - 2686 - nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 2687 - pages = kvmalloc_array(nr_pages, sizeof(struct page *), gfp); 2688 - if (!pages) 2689 - return ERR_PTR(-ENOMEM); 2690 - 2691 - ret = io_mem_alloc_compound(pages, nr_pages, size, gfp); 2692 - if (!IS_ERR(ret)) 2693 - goto done; 2694 - 2695 - ret = io_mem_alloc_single(pages, nr_pages, size, gfp); 2696 - if (!IS_ERR(ret)) { 2697 - done: 2698 - *out_pages = pages; 2699 - *npages = nr_pages; 2700 - return ret; 2701 - } 2702 - 2703 - kvfree(pages); 2704 - *out_pages = NULL; 2705 - *npages = 0; 2706 - return ret; 2707 2737 } 2708 2738 2709 2739 static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries, ··· 3192 3370 io_uring_cancel_generic(cancel_all, NULL); 3193 3371 } 3194 3372 3195 - static void *io_uring_validate_mmap_request(struct file *file, 3196 - loff_t pgoff, size_t sz) 3197 - { 3198 - struct io_ring_ctx *ctx = file->private_data; 3199 - loff_t offset = pgoff << PAGE_SHIFT; 3200 - 3201 - switch ((pgoff << PAGE_SHIFT) & IORING_OFF_MMAP_MASK) { 3202 - case IORING_OFF_SQ_RING: 3203 - case IORING_OFF_CQ_RING: 3204 - /* Don't allow mmap if the ring was setup without it */ 3205 - if (ctx->flags & IORING_SETUP_NO_MMAP) 3206 - return ERR_PTR(-EINVAL); 3207 - return ctx->rings; 3208 - case IORING_OFF_SQES: 3209 - /* Don't allow mmap if the ring was setup without it */ 3210 - if (ctx->flags & IORING_SETUP_NO_MMAP) 3211 - return ERR_PTR(-EINVAL); 3212 - return ctx->sq_sqes; 3213 - case IORING_OFF_PBUF_RING: { 3214 - struct io_buffer_list *bl; 3215 - unsigned int bgid; 3216 - void *ptr; 3217 - 3218 - bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT; 3219 - bl = io_pbuf_get_bl(ctx, bgid); 3220 - if (IS_ERR(bl)) 3221 - return bl; 3222 - ptr = bl->buf_ring; 3223 - io_put_bl(ctx, bl); 3224 - return ptr; 3225 - } 3226 - } 3227 - 3228 - return ERR_PTR(-EINVAL); 3229 - } 3230 - 3231 - int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma, 3232 - struct page **pages, int npages) 3233 - { 3234 - unsigned long nr_pages = npages; 3235 - 3236 - vm_flags_set(vma, VM_DONTEXPAND); 3237 - return vm_insert_pages(vma, vma->vm_start, pages, &nr_pages); 3238 - } 3239 - 3240 - #ifdef CONFIG_MMU 3241 - 3242 - static __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma) 3243 - { 3244 - struct io_ring_ctx *ctx = file->private_data; 3245 - size_t sz = vma->vm_end - vma->vm_start; 3246 - long offset = vma->vm_pgoff << PAGE_SHIFT; 3247 - void *ptr; 3248 - 3249 - ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz); 3250 - if (IS_ERR(ptr)) 3251 - return PTR_ERR(ptr); 3252 - 3253 - switch (offset & IORING_OFF_MMAP_MASK) { 3254 - case IORING_OFF_SQ_RING: 3255 - case IORING_OFF_CQ_RING: 3256 - return io_uring_mmap_pages(ctx, vma, ctx->ring_pages, 3257 - ctx->n_ring_pages); 3258 - case IORING_OFF_SQES: 3259 - return io_uring_mmap_pages(ctx, vma, ctx->sqe_pages, 3260 - ctx->n_sqe_pages); 3261 - case IORING_OFF_PBUF_RING: 3262 - return io_pbuf_mmap(file, vma); 3263 - } 3264 - 3265 - return -EINVAL; 3266 - } 3267 - 3268 - static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp, 3269 - unsigned long addr, unsigned long len, 3270 - unsigned long pgoff, unsigned long flags) 3271 - { 3272 - void *ptr; 3273 - 3274 - /* 3275 - * Do not allow to map to user-provided address to avoid breaking the 3276 - * aliasing rules. Userspace is not able to guess the offset address of 3277 - * kernel kmalloc()ed memory area. 3278 - */ 3279 - if (addr) 3280 - return -EINVAL; 3281 - 3282 - ptr = io_uring_validate_mmap_request(filp, pgoff, len); 3283 - if (IS_ERR(ptr)) 3284 - return -ENOMEM; 3285 - 3286 - /* 3287 - * Some architectures have strong cache aliasing requirements. 3288 - * For such architectures we need a coherent mapping which aliases 3289 - * kernel memory *and* userspace memory. To achieve that: 3290 - * - use a NULL file pointer to reference physical memory, and 3291 - * - use the kernel virtual address of the shared io_uring context 3292 - * (instead of the userspace-provided address, which has to be 0UL 3293 - * anyway). 3294 - * - use the same pgoff which the get_unmapped_area() uses to 3295 - * calculate the page colouring. 3296 - * For architectures without such aliasing requirements, the 3297 - * architecture will return any suitable mapping because addr is 0. 3298 - */ 3299 - filp = NULL; 3300 - flags |= MAP_SHARED; 3301 - pgoff = 0; /* has been translated to ptr above */ 3302 - #ifdef SHM_COLOUR 3303 - addr = (uintptr_t) ptr; 3304 - pgoff = addr >> PAGE_SHIFT; 3305 - #else 3306 - addr = 0UL; 3307 - #endif 3308 - return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); 3309 - } 3310 - 3311 - #else /* !CONFIG_MMU */ 3312 - 3313 - static int io_uring_mmap(struct file *file, struct vm_area_struct *vma) 3314 - { 3315 - return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -EINVAL; 3316 - } 3317 - 3318 - static unsigned int io_uring_nommu_mmap_capabilities(struct file *file) 3319 - { 3320 - return NOMMU_MAP_DIRECT | NOMMU_MAP_READ | NOMMU_MAP_WRITE; 3321 - } 3322 - 3323 - static unsigned long io_uring_nommu_get_unmapped_area(struct file *file, 3324 - unsigned long addr, unsigned long len, 3325 - unsigned long pgoff, unsigned long flags) 3326 - { 3327 - void *ptr; 3328 - 3329 - ptr = io_uring_validate_mmap_request(file, pgoff, len); 3330 - if (IS_ERR(ptr)) 3331 - return PTR_ERR(ptr); 3332 - 3333 - return (unsigned long) ptr; 3334 - } 3335 - 3336 - #endif /* !CONFIG_MMU */ 3337 - 3338 3373 static int io_validate_ext_arg(unsigned flags, const void __user *argp, size_t argsz) 3339 3374 { 3340 3375 if (flags & IORING_ENTER_EXT_ARG) { ··· 3374 3695 static const struct file_operations io_uring_fops = { 3375 3696 .release = io_uring_release, 3376 3697 .mmap = io_uring_mmap, 3698 + .get_unmapped_area = io_uring_get_unmapped_area, 3377 3699 #ifndef CONFIG_MMU 3378 - .get_unmapped_area = io_uring_nommu_get_unmapped_area, 3379 3700 .mmap_capabilities = io_uring_nommu_mmap_capabilities, 3380 - #else 3381 - .get_unmapped_area = io_uring_mmu_get_unmapped_area, 3382 3701 #endif 3383 3702 .poll = io_uring_poll, 3384 3703 #ifdef CONFIG_PROC_FS
-9
io_uring/io_uring.h
··· 69 69 bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags); 70 70 void __io_commit_cqring_flush(struct io_ring_ctx *ctx); 71 71 72 - struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages); 73 - int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma, 74 - struct page **pages, int npages); 75 - 76 72 struct file *io_file_get_normal(struct io_kiocb *req, int fd); 77 73 struct file *io_file_get_fixed(struct io_kiocb *req, int fd, 78 74 unsigned issue_flags); ··· 104 108 105 109 bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, 106 110 bool cancel_all); 107 - 108 - void *io_pages_map(struct page ***out_pages, unsigned short *npages, 109 - size_t size); 110 - void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages, 111 - bool put_pages); 112 111 113 112 enum { 114 113 IO_EVENTFD_OP_SIGNAL_BIT,
+1
io_uring/kbuf.c
··· 15 15 #include "io_uring.h" 16 16 #include "opdef.h" 17 17 #include "kbuf.h" 18 + #include "memmap.h" 18 19 19 20 #define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf)) 20 21
+336
io_uring/memmap.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/kernel.h> 3 + #include <linux/init.h> 4 + #include <linux/errno.h> 5 + #include <linux/mm.h> 6 + #include <linux/mman.h> 7 + #include <linux/slab.h> 8 + #include <linux/vmalloc.h> 9 + #include <linux/io_uring.h> 10 + #include <linux/io_uring_types.h> 11 + #include <asm/shmparam.h> 12 + 13 + #include "memmap.h" 14 + #include "kbuf.h" 15 + 16 + static void *io_mem_alloc_compound(struct page **pages, int nr_pages, 17 + size_t size, gfp_t gfp) 18 + { 19 + struct page *page; 20 + int i, order; 21 + 22 + order = get_order(size); 23 + if (order > MAX_PAGE_ORDER) 24 + return ERR_PTR(-ENOMEM); 25 + else if (order) 26 + gfp |= __GFP_COMP; 27 + 28 + page = alloc_pages(gfp, order); 29 + if (!page) 30 + return ERR_PTR(-ENOMEM); 31 + 32 + for (i = 0; i < nr_pages; i++) 33 + pages[i] = page + i; 34 + 35 + return page_address(page); 36 + } 37 + 38 + static void *io_mem_alloc_single(struct page **pages, int nr_pages, size_t size, 39 + gfp_t gfp) 40 + { 41 + void *ret; 42 + int i; 43 + 44 + for (i = 0; i < nr_pages; i++) { 45 + pages[i] = alloc_page(gfp); 46 + if (!pages[i]) 47 + goto err; 48 + } 49 + 50 + ret = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); 51 + if (ret) 52 + return ret; 53 + err: 54 + while (i--) 55 + put_page(pages[i]); 56 + return ERR_PTR(-ENOMEM); 57 + } 58 + 59 + void *io_pages_map(struct page ***out_pages, unsigned short *npages, 60 + size_t size) 61 + { 62 + gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN; 63 + struct page **pages; 64 + int nr_pages; 65 + void *ret; 66 + 67 + nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 68 + pages = kvmalloc_array(nr_pages, sizeof(struct page *), gfp); 69 + if (!pages) 70 + return ERR_PTR(-ENOMEM); 71 + 72 + ret = io_mem_alloc_compound(pages, nr_pages, size, gfp); 73 + if (!IS_ERR(ret)) 74 + goto done; 75 + 76 + ret = io_mem_alloc_single(pages, nr_pages, size, gfp); 77 + if (!IS_ERR(ret)) { 78 + done: 79 + *out_pages = pages; 80 + *npages = nr_pages; 81 + return ret; 82 + } 83 + 84 + kvfree(pages); 85 + *out_pages = NULL; 86 + *npages = 0; 87 + return ret; 88 + } 89 + 90 + void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages, 91 + bool put_pages) 92 + { 93 + bool do_vunmap = false; 94 + 95 + if (!ptr) 96 + return; 97 + 98 + if (put_pages && *npages) { 99 + struct page **to_free = *pages; 100 + int i; 101 + 102 + /* 103 + * Only did vmap for the non-compound multiple page case. 104 + * For the compound page, we just need to put the head. 105 + */ 106 + if (PageCompound(to_free[0])) 107 + *npages = 1; 108 + else if (*npages > 1) 109 + do_vunmap = true; 110 + for (i = 0; i < *npages; i++) 111 + put_page(to_free[i]); 112 + } 113 + if (do_vunmap) 114 + vunmap(ptr); 115 + kvfree(*pages); 116 + *pages = NULL; 117 + *npages = 0; 118 + } 119 + 120 + void io_pages_free(struct page ***pages, int npages) 121 + { 122 + struct page **page_array = *pages; 123 + 124 + if (!page_array) 125 + return; 126 + 127 + unpin_user_pages(page_array, npages); 128 + kvfree(page_array); 129 + *pages = NULL; 130 + } 131 + 132 + struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages) 133 + { 134 + unsigned long start, end, nr_pages; 135 + struct page **pages; 136 + int ret; 137 + 138 + end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 139 + start = uaddr >> PAGE_SHIFT; 140 + nr_pages = end - start; 141 + if (WARN_ON_ONCE(!nr_pages)) 142 + return ERR_PTR(-EINVAL); 143 + 144 + pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); 145 + if (!pages) 146 + return ERR_PTR(-ENOMEM); 147 + 148 + ret = pin_user_pages_fast(uaddr, nr_pages, FOLL_WRITE | FOLL_LONGTERM, 149 + pages); 150 + /* success, mapped all pages */ 151 + if (ret == nr_pages) { 152 + *npages = nr_pages; 153 + return pages; 154 + } 155 + 156 + /* partial map, or didn't map anything */ 157 + if (ret >= 0) { 158 + /* if we did partial map, release any pages we did get */ 159 + if (ret) 160 + unpin_user_pages(pages, ret); 161 + ret = -EFAULT; 162 + } 163 + kvfree(pages); 164 + return ERR_PTR(ret); 165 + } 166 + 167 + void *__io_uaddr_map(struct page ***pages, unsigned short *npages, 168 + unsigned long uaddr, size_t size) 169 + { 170 + struct page **page_array; 171 + unsigned int nr_pages; 172 + void *page_addr; 173 + 174 + *npages = 0; 175 + 176 + if (uaddr & (PAGE_SIZE - 1) || !size) 177 + return ERR_PTR(-EINVAL); 178 + 179 + nr_pages = 0; 180 + page_array = io_pin_pages(uaddr, size, &nr_pages); 181 + if (IS_ERR(page_array)) 182 + return page_array; 183 + 184 + page_addr = vmap(page_array, nr_pages, VM_MAP, PAGE_KERNEL); 185 + if (page_addr) { 186 + *pages = page_array; 187 + *npages = nr_pages; 188 + return page_addr; 189 + } 190 + 191 + io_pages_free(&page_array, nr_pages); 192 + return ERR_PTR(-ENOMEM); 193 + } 194 + 195 + static void *io_uring_validate_mmap_request(struct file *file, loff_t pgoff, 196 + size_t sz) 197 + { 198 + struct io_ring_ctx *ctx = file->private_data; 199 + loff_t offset = pgoff << PAGE_SHIFT; 200 + 201 + switch ((pgoff << PAGE_SHIFT) & IORING_OFF_MMAP_MASK) { 202 + case IORING_OFF_SQ_RING: 203 + case IORING_OFF_CQ_RING: 204 + /* Don't allow mmap if the ring was setup without it */ 205 + if (ctx->flags & IORING_SETUP_NO_MMAP) 206 + return ERR_PTR(-EINVAL); 207 + return ctx->rings; 208 + case IORING_OFF_SQES: 209 + /* Don't allow mmap if the ring was setup without it */ 210 + if (ctx->flags & IORING_SETUP_NO_MMAP) 211 + return ERR_PTR(-EINVAL); 212 + return ctx->sq_sqes; 213 + case IORING_OFF_PBUF_RING: { 214 + struct io_buffer_list *bl; 215 + unsigned int bgid; 216 + void *ptr; 217 + 218 + bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT; 219 + bl = io_pbuf_get_bl(ctx, bgid); 220 + if (IS_ERR(bl)) 221 + return bl; 222 + ptr = bl->buf_ring; 223 + io_put_bl(ctx, bl); 224 + return ptr; 225 + } 226 + } 227 + 228 + return ERR_PTR(-EINVAL); 229 + } 230 + 231 + int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma, 232 + struct page **pages, int npages) 233 + { 234 + unsigned long nr_pages = npages; 235 + 236 + vm_flags_set(vma, VM_DONTEXPAND); 237 + return vm_insert_pages(vma, vma->vm_start, pages, &nr_pages); 238 + } 239 + 240 + #ifdef CONFIG_MMU 241 + 242 + __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma) 243 + { 244 + struct io_ring_ctx *ctx = file->private_data; 245 + size_t sz = vma->vm_end - vma->vm_start; 246 + long offset = vma->vm_pgoff << PAGE_SHIFT; 247 + void *ptr; 248 + 249 + ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz); 250 + if (IS_ERR(ptr)) 251 + return PTR_ERR(ptr); 252 + 253 + switch (offset & IORING_OFF_MMAP_MASK) { 254 + case IORING_OFF_SQ_RING: 255 + case IORING_OFF_CQ_RING: 256 + return io_uring_mmap_pages(ctx, vma, ctx->ring_pages, 257 + ctx->n_ring_pages); 258 + case IORING_OFF_SQES: 259 + return io_uring_mmap_pages(ctx, vma, ctx->sqe_pages, 260 + ctx->n_sqe_pages); 261 + case IORING_OFF_PBUF_RING: 262 + return io_pbuf_mmap(file, vma); 263 + } 264 + 265 + return -EINVAL; 266 + } 267 + 268 + unsigned long io_uring_get_unmapped_area(struct file *filp, unsigned long addr, 269 + unsigned long len, unsigned long pgoff, 270 + unsigned long flags) 271 + { 272 + void *ptr; 273 + 274 + /* 275 + * Do not allow to map to user-provided address to avoid breaking the 276 + * aliasing rules. Userspace is not able to guess the offset address of 277 + * kernel kmalloc()ed memory area. 278 + */ 279 + if (addr) 280 + return -EINVAL; 281 + 282 + ptr = io_uring_validate_mmap_request(filp, pgoff, len); 283 + if (IS_ERR(ptr)) 284 + return -ENOMEM; 285 + 286 + /* 287 + * Some architectures have strong cache aliasing requirements. 288 + * For such architectures we need a coherent mapping which aliases 289 + * kernel memory *and* userspace memory. To achieve that: 290 + * - use a NULL file pointer to reference physical memory, and 291 + * - use the kernel virtual address of the shared io_uring context 292 + * (instead of the userspace-provided address, which has to be 0UL 293 + * anyway). 294 + * - use the same pgoff which the get_unmapped_area() uses to 295 + * calculate the page colouring. 296 + * For architectures without such aliasing requirements, the 297 + * architecture will return any suitable mapping because addr is 0. 298 + */ 299 + filp = NULL; 300 + flags |= MAP_SHARED; 301 + pgoff = 0; /* has been translated to ptr above */ 302 + #ifdef SHM_COLOUR 303 + addr = (uintptr_t) ptr; 304 + pgoff = addr >> PAGE_SHIFT; 305 + #else 306 + addr = 0UL; 307 + #endif 308 + return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); 309 + } 310 + 311 + #else /* !CONFIG_MMU */ 312 + 313 + int io_uring_mmap(struct file *file, struct vm_area_struct *vma) 314 + { 315 + return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -EINVAL; 316 + } 317 + 318 + unsigned int io_uring_nommu_mmap_capabilities(struct file *file) 319 + { 320 + return NOMMU_MAP_DIRECT | NOMMU_MAP_READ | NOMMU_MAP_WRITE; 321 + } 322 + 323 + unsigned long io_uring_get_unmapped_area(struct file *file, unsigned long addr, 324 + unsigned long len, unsigned long pgoff, 325 + unsigned long flags) 326 + { 327 + void *ptr; 328 + 329 + ptr = io_uring_validate_mmap_request(file, pgoff, len); 330 + if (IS_ERR(ptr)) 331 + return PTR_ERR(ptr); 332 + 333 + return (unsigned long) ptr; 334 + } 335 + 336 + #endif /* !CONFIG_MMU */
+25
io_uring/memmap.h
··· 1 + #ifndef IO_URING_MEMMAP_H 2 + #define IO_URING_MEMMAP_H 3 + 4 + struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages); 5 + void io_pages_free(struct page ***pages, int npages); 6 + int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma, 7 + struct page **pages, int npages); 8 + 9 + void *io_pages_map(struct page ***out_pages, unsigned short *npages, 10 + size_t size); 11 + void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages, 12 + bool put_pages); 13 + 14 + void *__io_uaddr_map(struct page ***pages, unsigned short *npages, 15 + unsigned long uaddr, size_t size); 16 + 17 + #ifndef CONFIG_MMU 18 + unsigned int io_uring_nommu_mmap_capabilities(struct file *file); 19 + #endif 20 + unsigned long io_uring_get_unmapped_area(struct file *file, unsigned long addr, 21 + unsigned long len, unsigned long pgoff, 22 + unsigned long flags); 23 + int io_uring_mmap(struct file *file, struct vm_area_struct *vma); 24 + 25 + #endif
+1
io_uring/rsrc.c
··· 16 16 #include "alloc_cache.h" 17 17 #include "openclose.h" 18 18 #include "rsrc.h" 19 + #include "memmap.h" 19 20 20 21 struct io_rsrc_update { 21 22 struct file *file;