Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-6.15/io_uring-epoll-wait' into for-6.15/io_uring-reg-vec

* for-6.15/io_uring-epoll-wait:
io_uring/epoll: add support for IORING_OP_EPOLL_WAIT
io_uring/epoll: remove CONFIG_EPOLL guards
eventpoll: add epoll_sendevents() helper
eventpoll: abstract out ep_try_send_events() helper
eventpoll: abstract out parameter sanity checking

+122 -30
+63 -24
fs/eventpoll.c
··· 1980 1980 return ret; 1981 1981 } 1982 1982 1983 + static int ep_try_send_events(struct eventpoll *ep, 1984 + struct epoll_event __user *events, int maxevents) 1985 + { 1986 + int res; 1987 + 1988 + /* 1989 + * Try to transfer events to user space. In case we get 0 events and 1990 + * there's still timeout left over, we go trying again in search of 1991 + * more luck. 1992 + */ 1993 + res = ep_send_events(ep, events, maxevents); 1994 + if (res > 0) 1995 + ep_suspend_napi_irqs(ep); 1996 + return res; 1997 + } 1998 + 1983 1999 /** 1984 2000 * ep_poll - Retrieves ready events, and delivers them to the caller-supplied 1985 2001 * event buffer. ··· 2047 2031 2048 2032 while (1) { 2049 2033 if (eavail) { 2050 - /* 2051 - * Try to transfer events to user space. In case we get 2052 - * 0 events and there's still timeout left over, we go 2053 - * trying again in search of more luck. 2054 - */ 2055 - res = ep_send_events(ep, events, maxevents); 2056 - if (res) { 2057 - if (res > 0) 2058 - ep_suspend_napi_irqs(ep); 2034 + res = ep_try_send_events(ep, events, maxevents); 2035 + if (res) 2059 2036 return res; 2060 - } 2061 2037 } 2062 2038 2063 2039 if (timed_out) ··· 2453 2445 return do_epoll_ctl(epfd, op, fd, &epds, false); 2454 2446 } 2455 2447 2448 + static int ep_check_params(struct file *file, struct epoll_event __user *evs, 2449 + int maxevents) 2450 + { 2451 + /* The maximum number of event must be greater than zero */ 2452 + if (maxevents <= 0 || maxevents > EP_MAX_EVENTS) 2453 + return -EINVAL; 2454 + 2455 + /* Verify that the area passed by the user is writeable */ 2456 + if (!access_ok(evs, maxevents * sizeof(struct epoll_event))) 2457 + return -EFAULT; 2458 + 2459 + /* 2460 + * We have to check that the file structure underneath the fd 2461 + * the user passed to us _is_ an eventpoll file. 2462 + */ 2463 + if (!is_file_epoll(file)) 2464 + return -EINVAL; 2465 + 2466 + return 0; 2467 + } 2468 + 2469 + int epoll_sendevents(struct file *file, struct epoll_event __user *events, 2470 + int maxevents) 2471 + { 2472 + struct eventpoll *ep; 2473 + int ret; 2474 + 2475 + ret = ep_check_params(file, events, maxevents); 2476 + if (unlikely(ret)) 2477 + return ret; 2478 + 2479 + ep = file->private_data; 2480 + /* 2481 + * Racy call, but that's ok - it should get retried based on 2482 + * poll readiness anyway. 2483 + */ 2484 + if (ep_events_available(ep)) 2485 + return ep_try_send_events(ep, events, maxevents); 2486 + return 0; 2487 + } 2488 + 2456 2489 /* 2457 2490 * Implement the event wait interface for the eventpoll file. It is the kernel 2458 2491 * part of the user space epoll_wait(2). ··· 2502 2453 int maxevents, struct timespec64 *to) 2503 2454 { 2504 2455 struct eventpoll *ep; 2505 - 2506 - /* The maximum number of event must be greater than zero */ 2507 - if (maxevents <= 0 || maxevents > EP_MAX_EVENTS) 2508 - return -EINVAL; 2509 - 2510 - /* Verify that the area passed by the user is writeable */ 2511 - if (!access_ok(events, maxevents * sizeof(struct epoll_event))) 2512 - return -EFAULT; 2456 + int ret; 2513 2457 2514 2458 /* Get the "struct file *" for the eventpoll file */ 2515 2459 CLASS(fd, f)(epfd); 2516 2460 if (fd_empty(f)) 2517 2461 return -EBADF; 2518 2462 2519 - /* 2520 - * We have to check that the file structure underneath the fd 2521 - * the user passed to us _is_ an eventpoll file. 2522 - */ 2523 - if (!is_file_epoll(fd_file(f))) 2524 - return -EINVAL; 2463 + ret = ep_check_params(fd_file(f), events, maxevents); 2464 + if (unlikely(ret)) 2465 + return ret; 2525 2466 2526 2467 /* 2527 2468 * At this point it is safe to assume that the "private_data" contains
+4
include/linux/eventpoll.h
··· 25 25 /* Used to release the epoll bits inside the "struct file" */ 26 26 void eventpoll_release_file(struct file *file); 27 27 28 + /* Copy ready events to userspace */ 29 + int epoll_sendevents(struct file *file, struct epoll_event __user *events, 30 + int maxevents); 31 + 28 32 /* 29 33 * This is called from inside fs/file_table.c:__fput() to unlink files 30 34 * from the eventpoll interface. We need to have this facility to cleanup
+1
include/uapi/linux/io_uring.h
··· 280 280 IORING_OP_BIND, 281 281 IORING_OP_LISTEN, 282 282 IORING_OP_RECV_ZC, 283 + IORING_OP_EPOLL_WAIT, 283 284 284 285 /* this goes last, obviously */ 285 286 IORING_OP_LAST,
+5 -4
io_uring/Makefile
··· 11 11 eventfd.o uring_cmd.o openclose.o \ 12 12 sqpoll.o xattr.o nop.o fs.o splice.o \ 13 13 sync.o msg_ring.o advise.o openclose.o \ 14 - epoll.o statx.o timeout.o fdinfo.o \ 15 - cancel.o waitid.o register.o \ 16 - truncate.o memmap.o alloc_cache.o 14 + statx.o timeout.o fdinfo.o cancel.o \ 15 + waitid.o register.o truncate.o \ 16 + memmap.o alloc_cache.o 17 17 obj-$(CONFIG_IO_URING_ZCRX) += zcrx.o 18 18 obj-$(CONFIG_IO_WQ) += io-wq.o 19 19 obj-$(CONFIG_FUTEX) += futex.o 20 - obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o 20 + obj-$(CONFIG_EPOLL) += epoll.o 21 + obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o
+33 -2
io_uring/epoll.c
··· 12 12 #include "io_uring.h" 13 13 #include "epoll.h" 14 14 15 - #if defined(CONFIG_EPOLL) 16 15 struct io_epoll { 17 16 struct file *file; 18 17 int epfd; 19 18 int op; 20 19 int fd; 21 20 struct epoll_event event; 21 + }; 22 + 23 + struct io_epoll_wait { 24 + struct file *file; 25 + int maxevents; 26 + struct epoll_event __user *events; 22 27 }; 23 28 24 29 int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ··· 63 58 io_req_set_res(req, ret, 0); 64 59 return IOU_OK; 65 60 } 66 - #endif 61 + 62 + int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 63 + { 64 + struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait); 65 + 66 + if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) 67 + return -EINVAL; 68 + 69 + iew->maxevents = READ_ONCE(sqe->len); 70 + iew->events = u64_to_user_ptr(READ_ONCE(sqe->addr)); 71 + return 0; 72 + } 73 + 74 + int io_epoll_wait(struct io_kiocb *req, unsigned int issue_flags) 75 + { 76 + struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait); 77 + int ret; 78 + 79 + ret = epoll_sendevents(req->file, iew->events, iew->maxevents); 80 + if (ret == 0) 81 + return -EAGAIN; 82 + if (ret < 0) 83 + req_set_fail(req); 84 + 85 + io_req_set_res(req, ret, 0); 86 + return IOU_OK; 87 + }
+2
io_uring/epoll.h
··· 3 3 #if defined(CONFIG_EPOLL) 4 4 int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); 5 5 int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags); 6 + int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); 7 + int io_epoll_wait(struct io_kiocb *req, unsigned int issue_flags); 6 8 #endif
+14
io_uring/opdef.c
··· 529 529 .prep = io_eopnotsupp_prep, 530 530 #endif 531 531 }, 532 + [IORING_OP_EPOLL_WAIT] = { 533 + .needs_file = 1, 534 + .audit_skip = 1, 535 + .pollin = 1, 536 + #if defined(CONFIG_EPOLL) 537 + .prep = io_epoll_wait_prep, 538 + .issue = io_epoll_wait, 539 + #else 540 + .prep = io_eopnotsupp_prep, 541 + #endif 542 + }, 532 543 }; 533 544 534 545 const struct io_cold_def io_cold_defs[] = { ··· 771 760 }, 772 761 [IORING_OP_RECV_ZC] = { 773 762 .name = "RECV_ZC", 763 + }, 764 + [IORING_OP_EPOLL_WAIT] = { 765 + .name = "EPOLL_WAIT", 774 766 }, 775 767 }; 776 768