Merge pull request #118 from avsm/update-to-2.7 · anil.recoil.org/unpac-unpac@798c568

+1 -1

lib/uring/dune

··· 57 57 "%{ocaml-config:ocamlc_cflags}" 58 58 (run make -j -C src)))) 59 59 (copy %{project_root}/vendor/liburing/src/liburing.a liburing.a) 60 - (copy %{project_root}/vendor/liburing/src/liburing.so.2.4 dlluring.so) 60 + (copy %{project_root}/vendor/liburing/src/liburing.so.2.7 dlluring.so) 61 61 (copy %{project_root}/vendor/liburing/src/include/liburing.h liburing.h) 62 62 (copy 63 63 %{project_root}/vendor/liburing/src/include/liburing/io_uring.h

+12

lib/uring/include/discover.ml

··· 97 97 "IORING_OP_GETXATTR"; 98 98 "IORING_OP_SOCKET"; 99 99 "IORING_OP_URING_CMD"; 100 + "IORING_OP_READ_MULTISHOT"; 101 + "IORING_OP_WAITID"; 102 + "IORING_OP_FUTEX_WAIT"; 103 + "IORING_OP_FUTEX_WAKE"; 104 + "IORING_OP_FUTEX_WAITV"; 105 + "IORING_OP_FIXED_FD_INSTALL"; 106 + "IORING_OP_FTRUNCATE"; 107 + "IORING_OP_BIND"; 108 + "IORING_OP_LISTEN"; 100 109 ] 101 110 102 111 let uring_setup_flags = [ ··· 114 123 "IORING_SETUP_CQE32"; 115 124 "IORING_SETUP_SINGLE_ISSUER"; 116 125 "IORING_SETUP_DEFER_TASKRUN"; 126 + "IORING_SETUP_NO_MMAP"; 127 + "IORING_SETUP_REGISTERED_FD_ONLY"; 128 + "IORING_SETUP_NO_SQARRAY"; 117 129 ] 118 130 119 131 let uring_defs c =

+9

lib/uring/uring.ml

··· 330 330 external submit_close : t -> Unix.file_descr -> id -> bool = "ocaml_uring_submit_close" [@@noalloc] 331 331 external submit_statx : t -> id -> Unix.file_descr -> Statx.t -> Sketch.ptr -> int -> int -> bool = "ocaml_uring_submit_statx_byte" "ocaml_uring_submit_statx_native" [@@noalloc] 332 332 external submit_splice : t -> id -> Unix.file_descr -> Unix.file_descr -> int -> bool = "ocaml_uring_submit_splice" [@@noalloc] 333 + external submit_bind : t -> id -> Unix.file_descr -> Sockaddr.t -> bool = "ocaml_uring_submit_bind" [@@noalloc] 334 + external submit_listen : t -> id -> Unix.file_descr -> int -> bool = "ocaml_uring_submit_listen" [@@noalloc] 333 335 external submit_connect : t -> id -> Unix.file_descr -> Sockaddr.t -> bool = "ocaml_uring_submit_connect" [@@noalloc] 334 336 external submit_accept : t -> id -> Unix.file_descr -> Sockaddr.t -> bool = "ocaml_uring_submit_accept" [@@noalloc] 335 337 external submit_cancel : t -> id -> id -> bool = "ocaml_uring_submit_cancel" [@@noalloc] ··· 545 547 546 548 let splice t ~src ~dst ~len user_data = 547 549 with_id t (fun id -> Uring.submit_splice t.uring id src dst len) user_data 550 + 551 + let bind t fd addr user_data = 552 + let addr = Sockaddr.of_unix addr in 553 + with_id_full t (fun id -> Uring.submit_bind t.uring id fd addr) user_data ~extra_data:addr 554 + 555 + let listen t fd backlog user_data = 556 + with_id t (fun id -> Uring.submit_listen t.uring id fd backlog) user_data 548 557 549 558 let connect t fd addr user_data = 550 559 let addr = Sockaddr.of_unix addr in

+6

lib/uring/uring.mli

··· 595 595 (** [statx t ?fd ~mask path stat flags] stats [path], which is resolved relative to [fd] 596 596 (or the current directory if [fd] is not given). *) 597 597 598 + val bind : 'a t -> Unix.file_descr -> Unix.sockaddr -> 'a -> 'a job option 599 + (** [bind t fd addr d] will submit a request to bind [fd] to [addr]. *) 600 + 601 + val listen : 'a t -> Unix.file_descr -> int -> 'a -> 'a job option 602 + (** [listen t fd backlog d] will submit a request to listen on [fd] with [backlog] maximum pending connections. *) 603 + 598 604 val connect : 'a t -> Unix.file_descr -> Unix.sockaddr -> 'a -> 'a job option 599 605 (** [connect t fd addr d] will submit a request to connect [fd] to [addr]. *) 600 606

+24

lib/uring/uring_stubs.c

··· 778 778 779 779 // v_sockaddr must not be GC'd while the call is in progress 780 780 value /* noalloc */ 781 + ocaml_uring_submit_bind(value v_uring, value v_id, value v_fd, value v_sockaddr) { 782 + struct io_uring *ring = Ring_val(v_uring); 783 + struct io_uring_sqe *sqe; 784 + struct sock_addr_data *addr = Sock_addr_val(v_sockaddr); 785 + sqe = io_uring_get_sqe(ring); 786 + if (!sqe) return (Val_false); 787 + io_uring_prep_bind(sqe, Int_val(v_fd), &(addr->sock_addr_addr.s_gen), addr->sock_addr_len); 788 + io_uring_sqe_set_data(sqe, (void *)Long_val(v_id)); 789 + return (Val_true); 790 + } 791 + 792 + value /* noalloc */ 793 + ocaml_uring_submit_listen(value v_uring, value v_id, value v_fd, value v_backlog) { 794 + struct io_uring *ring = Ring_val(v_uring); 795 + struct io_uring_sqe *sqe; 796 + sqe = io_uring_get_sqe(ring); 797 + if (!sqe) return (Val_false); 798 + io_uring_prep_listen(sqe, Int_val(v_fd), Int_val(v_backlog)); 799 + io_uring_sqe_set_data(sqe, (void *)Long_val(v_id)); 800 + return (Val_true); 801 + } 802 + 803 + // v_sockaddr must not be GC'd while the call is in progress 804 + value /* noalloc */ 781 805 ocaml_uring_submit_connect(value v_uring, value v_id, value v_fd, value v_sockaddr) { 782 806 struct io_uring *ring = Ring_val(v_uring); 783 807 struct io_uring_sqe *sqe;

+5

tests/dune

··· 39 39 (modules poll_add) 40 40 (libraries unix uring logs logs.fmt)) 41 41 42 + (executable 43 + (name socket_ops) 44 + (modules socket_ops) 45 + (libraries unix uring)) 46 + 42 47 (rule 43 48 (alias runtest) 44 49 (package uring)

+133

tests/socket_ops.ml

··· 1 + open Printf 2 + 3 + let () = 4 + let queue_depth = 128 in 5 + let t = Uring.create ~queue_depth () in 6 + 7 + (* Create server socket - Unix.socket is necessary as io_uring doesn't have socket creation *) 8 + let server_sock = Unix.socket Unix.PF_INET Unix.SOCK_STREAM 0 in 9 + Unix.set_nonblock server_sock; 10 + printf "Server socket created\n"; 11 + 12 + (* Create an address to bind to *) 13 + let addr = Unix.ADDR_INET (Unix.inet_addr_loopback, 0) in 14 + 15 + (* Use io_uring for bind operation *) 16 + let bind_result = 17 + match Uring.bind t server_sock addr () with 18 + | None -> failwith "Failed to submit bind operation" 19 + | Some _job -> 20 + let _submitted = Uring.submit t in 21 + match Uring.wait t with 22 + | Uring.None -> failwith "No completion for bind" 23 + | Uring.Some { result; data = _ } -> 24 + if result < 0 then begin 25 + Uring.close t server_sock () |> ignore; 26 + Uring.submit t |> ignore; 27 + Uring.exit t; 28 + let err = Uring.error_of_errno (-result) in 29 + failwith (sprintf "Bind failed: %s" (Unix.error_message err)) 30 + end else 31 + result 32 + in 33 + printf "Bind completed with result: %d\n" bind_result; 34 + 35 + (* Use io_uring for listen operation *) 36 + let backlog = 10 in 37 + let listen_result = 38 + match Uring.listen t server_sock backlog () with 39 + | None -> failwith "Failed to submit listen operation" 40 + | Some _job -> 41 + let _submitted = Uring.submit t in 42 + match Uring.wait t with 43 + | Uring.None -> failwith "No completion for listen" 44 + | Uring.Some { result; data = _ } -> 45 + if result < 0 then begin 46 + Uring.close t server_sock () |> ignore; 47 + Uring.submit t |> ignore; 48 + Uring.exit t; 49 + let err = Uring.error_of_errno (-result) in 50 + failwith (sprintf "Listen failed: %s" (Unix.error_message err)) 51 + end else 52 + result 53 + in 54 + printf "Listen completed with result: %d\n" listen_result; 55 + 56 + (* Get the actual bound port - Unix.getsockname is necessary for socket introspection *) 57 + let actual_addr = Unix.getsockname server_sock in 58 + let port = match actual_addr with 59 + | Unix.ADDR_INET (_, p) -> p 60 + | _ -> failwith "Unexpected address type" 61 + in 62 + printf "Socket bound and listening on port: %d\n" port; 63 + 64 + (* Test connecting to the bound socket *) 65 + printf "Testing connection to the bound socket...\n"; 66 + 67 + (* Create client socket - Unix.socket is necessary as io_uring doesn't have socket creation *) 68 + let client_sock = Unix.socket Unix.PF_INET Unix.SOCK_STREAM 0 in 69 + Unix.set_nonblock client_sock; 70 + printf "Client socket created\n"; 71 + 72 + (* Use io_uring for connect operation *) 73 + let connect_addr = Unix.ADDR_INET (Unix.inet_addr_loopback, port) in 74 + let connect_result = 75 + match Uring.connect t client_sock connect_addr () with 76 + | None -> failwith "Failed to submit connect operation" 77 + | Some _job -> 78 + let _submitted = Uring.submit t in 79 + match Uring.wait t with 80 + | Uring.None -> failwith "No completion for connect" 81 + | Uring.Some { result; data = _ } -> 82 + (* Connect may return -EINPROGRESS for non-blocking sockets, which is normal *) 83 + if result < 0 && result <> (-115) (* -EINPROGRESS *) then begin 84 + Uring.close t client_sock () |> ignore; 85 + Uring.close t server_sock () |> ignore; 86 + Uring.submit t |> ignore; 87 + Uring.exit t; 88 + let err = Uring.error_of_errno (-result) in 89 + failwith (sprintf "Connect failed: %s (errno: %d)" (Unix.error_message err) (-result)) 90 + end else 91 + result 92 + in 93 + 94 + if connect_result = 0 || connect_result = (-115) then 95 + printf "Connect initiated successfully (result: %d)\n" connect_result 96 + else 97 + printf "Connect completed with result: %d\n" connect_result; 98 + 99 + (* Get the client socket's local port - Unix.getsockname is necessary for socket introspection *) 100 + let client_addr = Unix.getsockname client_sock in 101 + let client_port = match client_addr with 102 + | Unix.ADDR_INET (_, p) -> p 103 + | _ -> failwith "Unexpected address type" 104 + in 105 + printf "Client socket connected from port: %d to port: %d\n" client_port port; 106 + 107 + (* Clean up using io_uring close operations *) 108 + begin match Uring.close t client_sock () with 109 + | None -> failwith "Failed to submit close for client socket" 110 + | Some _ -> () 111 + end; 112 + 113 + begin match Uring.close t server_sock () with 114 + | None -> failwith "Failed to submit close for server socket" 115 + | Some _ -> () 116 + end; 117 + 118 + let _submitted = Uring.submit t in 119 + 120 + (* Wait for both close operations to complete *) 121 + let rec wait_closes pending = 122 + if pending > 0 then 123 + match Uring.wait t with 124 + | Uring.None -> failwith "No completion for close" 125 + | Uring.Some { result; data = _ } -> 126 + if result < 0 then 127 + printf "Close warning: %s\n" (Unix.error_message (Uring.error_of_errno (-result))); 128 + wait_closes (pending - 1) 129 + in 130 + wait_closes 2; 131 + 132 + Uring.exit t; 133 + printf "Test completed successfully!\n"

+9 -1

vendor/liburing/.github/workflows/build.yml

··· 50 50 cc: arm-linux-gnueabi-gcc 51 51 cxx: arm-linux-gnueabi-g++ 52 52 53 + # riscv64 54 + - arch: riscv64 55 + cc_pkg: gcc-riscv64-linux-gnu 56 + cxx_pkg: g++-riscv64-linux-gnu 57 + cc: riscv64-linux-gnu-gcc 58 + cxx: riscv64-linux-gnu-g++ 59 + 53 60 # powerpc64 54 61 - arch: powerpc64 55 62 cc_pkg: gcc-powerpc64-linux-gnu ··· 100 107 101 108 steps: 102 109 - name: Checkout source 103 - uses: actions/checkout@v2 110 + uses: actions/checkout@v3 104 111 105 112 - name: Install Compilers 106 113 run: | 107 114 if [[ "${{matrix.cc_pkg}}" == "clang" ]]; then \ 108 115 wget https://apt.llvm.org/llvm.sh -O /tmp/llvm.sh; \ 116 + sudo apt-get purge --auto-remove llvm python3-lldb-14 llvm-14 -y; \ 109 117 sudo bash /tmp/llvm.sh 17; \ 110 118 sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-17 400; \ 111 119 sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-17 400; \

+1 -1

vendor/liburing/.github/workflows/shellcheck.yml

··· 11 11 12 12 steps: 13 13 - name: Checkout source 14 - uses: actions/checkout@v2 14 + uses: actions/checkout@v3 15 15 16 16 - name: Display shellcheck version 17 17 run: shellcheck --version

+3

vendor/liburing/.gitignore

··· 19 19 /examples/io_uring-test 20 20 /examples/io_uring-udp 21 21 /examples/link-cp 22 + /examples/napi-busy-poll-client 23 + /examples/napi-busy-poll-server 22 24 /examples/ucontext-cp 23 25 /examples/poll-bench 26 + /examples/proxy 24 27 /examples/send-zerocopy 25 28 /examples/rsrc-update-bench 26 29

+47

vendor/liburing/CHANGELOG

··· 1 + liburing-2.7 release 2 + 3 + - Man page updates 4 + - Sync with kernel 6.10 5 + - send/recv bundle support 6 + - accept nowait and CQE_F_MORE 7 + - Add and update test cases 8 + - Fix io_uring_queue_init_mem() returning a value that was too small, 9 + potentially causing memory corruption in userspace by overwriting 10 + 64 bytes beyond the returned value. Also add test case for that. 11 + - Add 64-bit length variants of io_uring_prep_{m,f}advise() 12 + - Add BIND/LISTEN support and helpers / man pages 13 + - Add io_uring_enable_rings.3 man page 14 + - Fix bug in io_uring_prep_read_multishot() 15 + - Fixup bundle test cases 16 + - Add fixed-hugepage test case 17 + - Fix io_uring_prep_fixed_fd_install.3 man page 18 + - Note 'len' == 0 requirement in io_uring_prep_send.3 man page 19 + - Fix some test cases for skipping on older kernels 20 + 21 + liburing-2.6 release 22 + 23 + - Add getsockopt and setsockopt socket commands 24 + - Add test cases to test/hardlink 25 + - Man page fixes 26 + - Add futex support, and test cases 27 + - Add waitid support, and test cases 28 + - Add read multishot, and test cases 29 + - Add support for IORING_SETUP_NO_SQARRAY 30 + - Use IORING_SETUP_NO_SQARRAY as the default 31 + - Add support for IORING_OP_FIXED_FD_INSTALL 32 + - Add io_uring_prep_fixed_fd_install() helper 33 + - Support for napi busy polling 34 + - Improve/add test cases 35 + - Man page fixes 36 + - Add sample 'proxy' example 37 + 38 + liburing-2.5 release 39 + 40 + - Add support for io_uring_prep_cmd_sock() 41 + - Add support for application allocated ring memory, for placing rings 42 + in huge mem. Available through io_uring_queue_init_mem(). 43 + - Add support for registered ring fds 44 + - Various documentation updates 45 + - Various fixes 46 + 1 47 liburing-2.4 release 2 48 3 49 - Add io_uring_{major,minor,check}_version() functions. ··· 15 61 io_uring_prep_socket_direct() factor in being called with 16 62 IORING_FILE_INDEX_ALLOC for allocating a direct descriptor. 17 63 - Add io_uring_prep_sendto() function. 64 + - Add io_uring_prep_cmd_sock() function. 18 65 19 66 liburing-2.3 release 20 67

+15 -4

vendor/liburing/Makefile

··· 11 11 @$(MAKE) -C test 12 12 @$(MAKE) -C examples 13 13 14 - .PHONY: all install default clean test 15 - .PHONY: FORCE cscope 14 + library: 15 + @$(MAKE) -C src 16 16 17 - partcheck: all 18 - @echo "make partcheck => TODO add tests with out kernel support" 17 + .PHONY: all install default clean test library 18 + .PHONY: FORCE cscope 19 19 20 20 runtests: all 21 21 @$(MAKE) -C test runtests ··· 60 60 $(INSTALL) -m 755 -d $(DESTDIR)$(mandir)/man7 61 61 $(INSTALL) -m 644 man/*.7 $(DESTDIR)$(mandir)/man7 62 62 63 + uninstall: 64 + @$(MAKE) -C src uninstall prefix=$(DESTDIR)$(prefix) datadir=$(DESTDIR)$(datadir) 65 + @rm -f $(DESTDIR)$(libdevdir)/pkgconfig/$(NAME).pc 66 + @rm -f $(DESTDIR)$(libdevdir)/pkgconfig/$(NAME)-ffi.pc 67 + @rm -rf $(DESTDIR)$(mandir)/man2/io_uring*.2 68 + @rm -rf $(DESTDIR)$(mandir)/man3/io_uring*.3 69 + @rm -rf $(DESTDIR)$(mandir)/man7/io_uring*.7 70 + 63 71 install-tests: 64 72 @$(MAKE) -C test install prefix=$(DESTDIR)$(prefix) datadir=$(DESTDIR)$(datadir) 73 + 74 + uninstall-tests: 75 + @$(MAKE) -C test uninstall prefix=$(DESTDIR)$(prefix) datadir=$(DESTDIR)$(datadir) 65 76 66 77 clean: 67 78 @rm -f config-host.mak config-host.h cscope.out $(NAME).pc $(NAME)-ffi.pc test/*.dmesg

+1 -1

vendor/liburing/README

··· 54 54 # Prepare build config (optional). 55 55 # 56 56 # --cc specifies the C compiler. 57 - # --cxx speficies the C++ compiler. 57 + # --cxx specifies the C++ compiler. 58 58 # 59 59 ./configure --cc=gcc --cxx=g++; 60 60

+97 -30

vendor/liburing/configure

··· 5 5 cc=${CC:-gcc} 6 6 cxx=${CXX:-g++} 7 7 8 - # 9 - # TODO(ammarfaizi2): Remove this notice and `--nolibc` option. 10 - # 11 - nolibc_deprecated() { 12 - echo ""; 13 - echo "================================================================="; 14 - echo ""; 15 - echo " --nolibc option is deprecated and has no effect."; 16 - echo " It will be removed in a future liburing release."; 17 - echo ""; 18 - echo " liburing on x86-64, x86 (32-bit) and aarch64 always use CONFIG_NOLIBC."; 19 - echo ""; 20 - echo "================================================================="; 21 - echo ""; 22 - } 23 - 24 8 for opt do 25 9 optarg=$(expr "x$opt" : 'x[^=]*=$.*$' || true) 26 10 case "$opt" in 27 11 --help|-h) show_help=yes 28 12 ;; 29 - --prefix=*) prefix="$optarg" 13 + --prefix=*) prefix="$(realpath -s $optarg)" 30 14 ;; 31 15 --includedir=*) includedir="$optarg" 32 16 ;; ··· 42 26 ;; 43 27 --cxx=*) cxx="$optarg" 44 28 ;; 45 - --nolibc) nolibc_deprecated 29 + --use-libc) use_libc=yes 46 30 ;; 47 31 *) 48 32 echo "ERROR: unknown option $opt" ··· 91 75 --datadir=PATH install shared data in PATH [$datadir] 92 76 --cc=CMD use CMD as the C compiler 93 77 --cxx=CMD use CMD as the C++ compiler 94 - --nolibc build liburing without libc 78 + --use-libc use libc for liburing (useful for hardening) 95 79 EOF 96 80 exit 0 97 81 fi ··· 218 202 print_and_output_mak "mandir" "$mandir" 219 203 print_and_output_mak "datadir" "$datadir" 220 204 205 + #################################################### 206 + # Check for correct compiler runtime library to link with 207 + libgcc_link_flag="-lgcc" 208 + if $cc -print-libgcc-file-name >/dev/null 2>&1; then 209 + libgcc_link_flag="$($cc $CFLAGS $LDFLAGS -print-libgcc-file-name)" 210 + fi 211 + print_and_output_mak "libgcc_link_flag" "$libgcc_link_flag" 212 + #################################################### 213 + 221 214 ########################################## 222 215 # check for compiler -Wstringop-overflow 223 216 stringop_overflow="no" ··· 400 393 fi 401 394 print_config "NVMe uring command support" "$nvme_uring_cmd" 402 395 396 + ########################################## 397 + # Check futexv support 398 + futexv="no" 399 + cat > $TMPC << EOF 400 + #include <linux/futex.h> 401 + #include <unistd.h> 402 + #include <string.h> 403 + int main(void) 404 + { 405 + struct futex_waitv fw; 406 + 407 + memset(&fw, FUTEX_32, sizeof(fw)); 408 + 409 + return sizeof(struct futex_waitv); 410 + } 411 + EOF 412 + if compile_prog "" "" "futexv"; then 413 + futexv="yes" 414 + fi 415 + print_config "futex waitv support" "$futexv" 416 + 417 + ########################################## 418 + # Check idtype_t support 419 + has_idtype_t="no" 420 + cat > $TMPC << EOF 421 + #include <sys/wait.h> 422 + int main(void) 423 + { 424 + idtype_t v; 425 + return 0; 426 + } 427 + EOF 428 + if compile_prog "" "" "idtype_t"; then 429 + has_idtype_t="yes" 430 + fi 431 + print_config "has_idtype_t" "$has_idtype_t" 432 + 403 433 ############################################################################# 404 - # 405 - # Currently, CONFIG_NOLIBC is only enabled on x86-64, x86 (32-bit) and aarch64. 406 - # 407 - cat > $TMPC << EOF 434 + liburing_nolibc="no" 435 + if test "$use_libc" != "yes"; then 436 + 437 + # 438 + # Currently, CONFIG_NOLIBC only supports x86-64, x86 (32-bit), aarch64 and riscv64. 439 + # 440 + cat > $TMPC << EOF 408 441 int main(void){ 409 - #if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) 442 + #if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64) 410 443 return 0; 411 444 #else 412 445 #error libc is needed 413 446 #endif 414 447 } 415 448 EOF 416 - if compile_prog "" "" "nolibc support"; then 417 - liburing_nolibc="yes" 449 + 450 + if compile_prog "" "" "nolibc"; then 451 + liburing_nolibc="yes" 452 + fi 418 453 fi 419 - print_config "nolibc support" "$liburing_nolibc"; 454 + 455 + print_config "nolibc" "$liburing_nolibc"; 420 456 ############################################################################# 421 457 422 458 #################################################### ··· 471 507 if test "$has_fanotify" = "yes"; then 472 508 output_sym "CONFIG_HAVE_FANOTIFY" 473 509 fi 510 + if test "$futexv" = "yes"; then 511 + output_sym "CONFIG_HAVE_FUTEXV" 512 + fi 474 513 475 514 echo "CC=$cc" >> $config_host_mak 476 515 print_config "CC" "$cc" ··· 478 517 print_config "CXX" "$cxx" 479 518 480 519 # generate io_uring_version.h 481 - MAKE_PRINT_VARS="include Makefile.common\nprint-%: ; @echo \$(\$*)\n" 482 - VERSION_MAJOR=$(env echo -e "$MAKE_PRINT_VARS" | make -s --no-print-directory -f - print-VERSION_MAJOR) 483 - VERSION_MINOR=$(env echo -e "$MAKE_PRINT_VARS" | make -s --no-print-directory -f - print-VERSION_MINOR) 520 + # Reset MAKEFLAGS 521 + MAKEFLAGS= 522 + MAKE_PRINT_VARS="include Makefile.common\nprint-%%: ; @echo \$(\$*)\n" 523 + VERSION_MAJOR=$(printf "$MAKE_PRINT_VARS" | make -s --no-print-directory -f - print-VERSION_MAJOR) 524 + VERSION_MINOR=$(printf "$MAKE_PRINT_VARS" | make -s --no-print-directory -f - print-VERSION_MINOR) 484 525 io_uring_version_h="src/include/liburing/io_uring_version.h" 485 526 cat > $io_uring_version_h << EOF 486 527 /* SPDX-License-Identifier: MIT */ ··· 551 592 552 593 EOF 553 594 fi 595 + if test "$futexv" != "yes"; then 596 + cat >> $compat_h << EOF 597 + #include <inttypes.h> 554 598 599 + #define FUTEX_32 2 600 + #define FUTEX_WAITV_MAX 128 601 + 602 + struct futex_waitv { 603 + uint64_t val; 604 + uint64_t uaddr; 605 + uint32_t flags; 606 + uint32_t __reserved; 607 + }; 608 + 609 + EOF 610 + fi 611 + 612 + if test "$has_idtype_t" != "yes"; then 613 + cat >> $compat_h << EOF 614 + typedef enum 615 + { 616 + P_ALL, /* Wait for any child. */ 617 + P_PID, /* Wait for specified process. */ 618 + P_PGID /* Wait for members of process group. */ 619 + } idtype_t; 620 + EOF 621 + fi 555 622 cat >> $compat_h << EOF 556 623 #endif 557 624 EOF

+2

vendor/liburing/debian/liburing-dev.manpages

··· 1 1 usr/share/man/man2/io_uring_*.2 2 2 usr/share/man/man3/io_uring_*.3 3 3 usr/share/man/man7/io_uring.7 4 + usr/share/man/man3/IO_URING_*.3 5 + usr/share/man/man3/__io_uring_*.3

+2 -1

vendor/liburing/debian/rules

··· 5 5 6 6 DEB_BUILD_MAINT_OPTIONS = hardening=+bindnow 7 7 DEB_CFLAGS_MAINT_PREPEND = -Wall 8 + DEB_BUILD_OPTIONS += nocheck 8 9 9 10 include /usr/share/dpkg/default.mk 10 11 include /usr/share/dpkg/buildtools.mk 11 12 12 13 %: 13 - dh $@ 14 + dh $@ --parallel 14 15 15 16 override_dh_auto_configure: 16 17 ./configure \

+11 -4

vendor/liburing/examples/Makefile

··· 19 19 io_uring-test.c \ 20 20 io_uring-udp.c \ 21 21 link-cp.c \ 22 + napi-busy-poll-client.c \ 23 + napi-busy-poll-server.c \ 22 24 poll-bench.c \ 23 25 send-zerocopy.c \ 24 - rsrc-update-bench.c 26 + rsrc-update-bench.c \ 27 + proxy.c 25 28 26 29 all_targets := 27 30 ··· 29 32 ifdef CONFIG_HAVE_UCONTEXT 30 33 example_srcs += ucontext-cp.c 31 34 endif 32 - all_targets += ucontext-cp 35 + all_targets += ucontext-cp helpers.o 33 36 34 37 example_targets := $(patsubst %.c,%,$(patsubst %.cc,%,$(example_srcs))) 35 38 all_targets += $(example_targets) 36 39 40 + helpers = helpers.o 37 41 38 42 all: $(example_targets) 39 43 40 - %: %.c ../src/liburing.a 41 - $(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ $< $(LDFLAGS) 44 + helpers.o: helpers.c 45 + $(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ -c $< 46 + 47 + %: %.c $(helpers) ../src/liburing.a 48 + $(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ $< $(helpers) $(LDFLAGS) 42 49 43 50 clean: 44 51 @rm -f $(all_targets)

+62

vendor/liburing/examples/helpers.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + #include <fcntl.h> 3 + #include <stdint.h> 4 + #include <netinet/in.h> 5 + #include <arpa/inet.h> 6 + #include <stdio.h> 7 + #include <stdlib.h> 8 + #include <string.h> 9 + #include <sys/socket.h> 10 + #include <sys/time.h> 11 + #include <unistd.h> 12 + 13 + #include "helpers.h" 14 + 15 + int setup_listening_socket(int port, int ipv6) 16 + { 17 + struct sockaddr_in srv_addr = { }; 18 + struct sockaddr_in6 srv_addr6 = { }; 19 + int fd, enable, ret, domain; 20 + 21 + if (ipv6) 22 + domain = AF_INET6; 23 + else 24 + domain = AF_INET; 25 + 26 + fd = socket(domain, SOCK_STREAM, 0); 27 + if (fd == -1) { 28 + perror("socket()"); 29 + return -1; 30 + } 31 + 32 + enable = 1; 33 + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int)); 34 + if (ret < 0) { 35 + perror("setsockopt(SO_REUSEADDR)"); 36 + return -1; 37 + } 38 + 39 + if (ipv6) { 40 + srv_addr6.sin6_family = AF_INET6; 41 + srv_addr6.sin6_port = htons(port); 42 + srv_addr6.sin6_addr = in6addr_any; 43 + ret = bind(fd, (const struct sockaddr *)&srv_addr6, sizeof(srv_addr6)); 44 + } else { 45 + srv_addr.sin_family = AF_INET; 46 + srv_addr.sin_port = htons(port); 47 + srv_addr.sin_addr.s_addr = htonl(INADDR_ANY); 48 + ret = bind(fd, (const struct sockaddr *)&srv_addr, sizeof(srv_addr)); 49 + } 50 + 51 + if (ret < 0) { 52 + perror("bind()"); 53 + return -1; 54 + } 55 + 56 + if (listen(fd, 1024) < 0) { 57 + perror("listen()"); 58 + return -1; 59 + } 60 + 61 + return fd; 62 + }

+7

vendor/liburing/examples/helpers.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + #ifndef LIBURING_EX_HELPERS_H 3 + #define LIBURING_EX_HELPERS_H 4 + 5 + int setup_listening_socket(int port, int ipv6); 6 + 7 + #endif

+1 -1

vendor/liburing/examples/io_uring-test.c

··· 69 69 io_uring_prep_readv(sqe, fd, &iovecs[i], 1, offset); 70 70 offset += iovecs[i].iov_len; 71 71 i++; 72 - if (offset > sb.st_size) 72 + if (offset >= sb.st_size) 73 73 break; 74 74 } while (1); 75 75

+509

vendor/liburing/examples/napi-busy-poll-client.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Simple ping/pong client which can use the io_uring NAPI support. 4 + * 5 + * Needs to be run as root because it sets SCHED_FIFO scheduling class, 6 + * but will work without that. 7 + * 8 + * Example: 9 + * 10 + * sudo examples/napi-busy-poll-client -a 192.168.2.2 -n100000 -p4444 \ 11 + * -b -t10 -u 12 + * 13 + * send and receive 100k packets, using NAPI. 14 + */ 15 + #include <ctype.h> 16 + #include <errno.h> 17 + #include <float.h> 18 + #include <getopt.h> 19 + #include <liburing.h> 20 + #include <math.h> 21 + #include <sched.h> 22 + #include <stdio.h> 23 + #include <stdlib.h> 24 + #include <string.h> 25 + #include <sys/types.h> 26 + #include <sys/socket.h> 27 + #include <time.h> 28 + #include <unistd.h> 29 + #include <arpa/inet.h> 30 + #include <netdb.h> 31 + #include <netinet/in.h> 32 + 33 + #define MAXBUFLEN 100 34 + #define PORTNOLEN 10 35 + #define ADDRLEN 80 36 + #define RINGSIZE 1024 37 + 38 + #define printable(ch) (isprint((unsigned char)ch) ? ch : '#') 39 + 40 + enum { 41 + IOURING_RECV, 42 + IOURING_SEND, 43 + IOURING_RECVMSG, 44 + IOURING_SENDMSG 45 + }; 46 + 47 + struct ctx 48 + { 49 + struct io_uring ring; 50 + union { 51 + struct sockaddr_in6 saddr6; 52 + struct sockaddr_in saddr; 53 + }; 54 + 55 + int sockfd; 56 + int buffer_len; 57 + int num_pings; 58 + bool napi_check; 59 + 60 + union { 61 + char buffer[MAXBUFLEN]; 62 + struct timespec ts; 63 + }; 64 + 65 + int rtt_index; 66 + double *rtt; 67 + }; 68 + 69 + struct options 70 + { 71 + int num_pings; 72 + __u32 timeout; 73 + 74 + bool sq_poll; 75 + bool defer_tw; 76 + bool busy_loop; 77 + bool prefer_busy_poll; 78 + bool ipv6; 79 + 80 + char port[PORTNOLEN]; 81 + char addr[ADDRLEN]; 82 + }; 83 + 84 + static struct option longopts[] = 85 + { 86 + {"address" , 1, NULL, 'a'}, 87 + {"busy" , 0, NULL, 'b'}, 88 + {"help" , 0, NULL, 'h'}, 89 + {"num_pings", 1, NULL, 'n'}, 90 + {"port" , 1, NULL, 'p'}, 91 + {"prefer" , 1, NULL, 'u'}, 92 + {"sqpoll" , 0, NULL, 's'}, 93 + {"timeout" , 1, NULL, 't'}, 94 + {NULL , 0, NULL, 0 } 95 + }; 96 + 97 + static void printUsage(const char *name) 98 + { 99 + fprintf(stderr, 100 + "Usage: %s [-l|--listen] [-a|--address ip_address] [-p|--port port-no] [-s|--sqpoll]" 101 + " [-b|--busy] [-n|--num pings] [-t|--timeout busy-poll-timeout] [-u||--prefer] [-6] [-h|--help]\n" 102 + "--address\n" 103 + "-a : remote or local ipv6 address\n" 104 + "--busy\n" 105 + "-b : busy poll io_uring instead of blocking.\n" 106 + "--num_pings\n" 107 + "-n : number of pings\n" 108 + "--port\n" 109 + "-p : port\n" 110 + "--sqpoll\n" 111 + "-s : Configure io_uring to use SQPOLL thread\n" 112 + "--timeout\n" 113 + "-t : Configure NAPI busy poll timeout" 114 + "--prefer\n" 115 + "-u : prefer NAPI busy poll\n" 116 + "-6 : use IPV6\n" 117 + "--help\n" 118 + "-h : Display this usage message\n\n", 119 + name); 120 + } 121 + 122 + static void printError(const char *msg, int opt) 123 + { 124 + if (msg && opt) 125 + fprintf(stderr, "%s (-%c)\n", msg, printable(opt)); 126 + } 127 + 128 + static void setProcessScheduler(void) 129 + { 130 + struct sched_param param; 131 + 132 + param.sched_priority = sched_get_priority_max(SCHED_FIFO); 133 + if (sched_setscheduler(0, SCHED_FIFO, &param) < 0) 134 + fprintf(stderr, "sched_setscheduler() failed: (%d) %s\n", 135 + errno, strerror(errno)); 136 + } 137 + 138 + static double diffTimespec(const struct timespec *time1, const struct timespec *time0) 139 + { 140 + return (time1->tv_sec - time0->tv_sec) 141 + + (time1->tv_nsec - time0->tv_nsec) / 1000000000.0; 142 + } 143 + 144 + static uint64_t encodeUserData(char type, int fd) 145 + { 146 + return (uint32_t)fd | ((uint64_t)type << 56); 147 + } 148 + 149 + static void decodeUserData(uint64_t data, char *type, int *fd) 150 + { 151 + *type = data >> 56; 152 + *fd = data & 0xffffffffU; 153 + } 154 + 155 + static const char *opTypeToStr(char type) 156 + { 157 + const char *res; 158 + 159 + switch (type) { 160 + case IOURING_RECV: 161 + res = "IOURING_RECV"; 162 + break; 163 + case IOURING_SEND: 164 + res = "IOURING_SEND"; 165 + break; 166 + case IOURING_RECVMSG: 167 + res = "IOURING_RECVMSG"; 168 + break; 169 + case IOURING_SENDMSG: 170 + res = "IOURING_SENDMSG"; 171 + break; 172 + default: 173 + res = "Unknown"; 174 + } 175 + 176 + return res; 177 + } 178 + 179 + static void reportNapi(struct ctx *ctx) 180 + { 181 + unsigned int napi_id = 0; 182 + socklen_t len = sizeof(napi_id); 183 + 184 + getsockopt(ctx->sockfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id, &len); 185 + if (napi_id) 186 + printf(" napi id: %d\n", napi_id); 187 + else 188 + printf(" unassigned napi id\n"); 189 + 190 + ctx->napi_check = true; 191 + } 192 + 193 + static void sendPing(struct ctx *ctx) 194 + { 195 + struct io_uring_sqe *sqe = io_uring_get_sqe(&ctx->ring); 196 + 197 + clock_gettime(CLOCK_REALTIME, (struct timespec *)ctx->buffer); 198 + io_uring_prep_send(sqe, ctx->sockfd, ctx->buffer, sizeof(struct timespec), 0); 199 + sqe->user_data = encodeUserData(IOURING_SEND, ctx->sockfd); 200 + } 201 + 202 + static void receivePing(struct ctx *ctx) 203 + { 204 + struct io_uring_sqe *sqe = io_uring_get_sqe(&ctx->ring); 205 + 206 + io_uring_prep_recv(sqe, ctx->sockfd, ctx->buffer, MAXBUFLEN, 0); 207 + sqe->user_data = encodeUserData(IOURING_RECV, ctx->sockfd); 208 + } 209 + 210 + static void recordRTT(struct ctx *ctx) 211 + { 212 + struct timespec startTs = ctx->ts; 213 + 214 + // Send next ping. 215 + sendPing(ctx); 216 + 217 + // Store round-trip time. 218 + ctx->rtt[ctx->rtt_index] = diffTimespec(&ctx->ts, &startTs); 219 + ctx->rtt_index++; 220 + } 221 + 222 + static void printStats(struct ctx *ctx) 223 + { 224 + double minRTT = DBL_MAX; 225 + double maxRTT = 0.0; 226 + double avgRTT = 0.0; 227 + double stddevRTT = 0.0; 228 + 229 + // Calculate min, max, avg. 230 + for (int i = 0; i < ctx->rtt_index; i++) { 231 + if (ctx->rtt[i] < minRTT) 232 + minRTT = ctx->rtt[i]; 233 + if (ctx->rtt[i] > maxRTT) 234 + maxRTT = ctx->rtt[i]; 235 + 236 + avgRTT += ctx->rtt[i]; 237 + } 238 + avgRTT /= ctx->rtt_index; 239 + 240 + // Calculate stddev. 241 + for (int i = 0; i < ctx->rtt_index; i++) 242 + stddevRTT += fabs(ctx->rtt[i] - avgRTT); 243 + stddevRTT /= ctx->rtt_index; 244 + 245 + fprintf(stdout, " rtt(us) min/avg/max/mdev = %.3f/%.3f/%.3f/%.3f\n", 246 + minRTT * 1000000, avgRTT * 1000000, maxRTT * 1000000, stddevRTT * 1000000); 247 + } 248 + 249 + static int completion(struct ctx *ctx, struct io_uring_cqe *cqe) 250 + { 251 + char type; 252 + int fd; 253 + int res = cqe->res; 254 + 255 + decodeUserData(cqe->user_data, &type, &fd); 256 + if (res < 0) { 257 + fprintf(stderr, "unexpected %s failure: (%d) %s\n", 258 + opTypeToStr(type), -res, strerror(-res)); 259 + return -1; 260 + } 261 + 262 + switch (type) { 263 + case IOURING_SEND: 264 + receivePing(ctx); 265 + break; 266 + case IOURING_RECV: 267 + if (res != sizeof(struct timespec)) { 268 + fprintf(stderr, "unexpected ping reply len: %d\n", res); 269 + abort(); 270 + } 271 + 272 + if (!ctx->napi_check) { 273 + reportNapi(ctx); 274 + sendPing(ctx); 275 + } else { 276 + recordRTT(ctx); 277 + } 278 + 279 + --ctx->num_pings; 280 + break; 281 + 282 + default: 283 + fprintf(stderr, "unexpected %s completion\n", 284 + opTypeToStr(type)); 285 + return -1; 286 + break; 287 + } 288 + 289 + return 0; 290 + } 291 + 292 + int main(int argc, char *argv[]) 293 + { 294 + struct ctx ctx; 295 + struct options opt; 296 + struct __kernel_timespec *tsPtr; 297 + struct __kernel_timespec ts; 298 + struct io_uring_params params; 299 + struct io_uring_napi napi; 300 + int flag, ret, af; 301 + 302 + memset(&opt, 0, sizeof(struct options)); 303 + 304 + // Process flags. 305 + while ((flag = getopt_long(argc, argv, ":hs:bua:n:p:t:6d:", longopts, NULL)) != -1) { 306 + switch (flag) { 307 + case 'a': 308 + strcpy(opt.addr, optarg); 309 + break; 310 + case 'b': 311 + opt.busy_loop = true; 312 + break; 313 + case 'h': 314 + printUsage(argv[0]); 315 + exit(0); 316 + break; 317 + case 'n': 318 + opt.num_pings = atoi(optarg) + 1; 319 + break; 320 + case 'p': 321 + strcpy(opt.port, optarg); 322 + break; 323 + case 's': 324 + opt.sq_poll = !!atoi(optarg); 325 + break; 326 + case 't': 327 + opt.timeout = atoi(optarg); 328 + break; 329 + case 'u': 330 + opt.prefer_busy_poll = true; 331 + break; 332 + case '6': 333 + opt.ipv6 = true; 334 + break; 335 + case 'd': 336 + opt.defer_tw = !!atoi(optarg); 337 + break; 338 + case ':': 339 + printError("Missing argument", optopt); 340 + printUsage(argv[0]); 341 + exit(-1); 342 + break; 343 + case '?': 344 + printError("Unrecognized option", optopt); 345 + printUsage(argv[0]); 346 + exit(-1); 347 + break; 348 + 349 + default: 350 + fprintf(stderr, "Fatal: Unexpected case in CmdLineProcessor switch()\n"); 351 + exit(-1); 352 + break; 353 + } 354 + } 355 + 356 + if (strlen(opt.addr) == 0) { 357 + fprintf(stderr, "address option is mandatory\n"); 358 + printUsage(argv[0]); 359 + exit(1); 360 + } 361 + 362 + if (opt.ipv6) { 363 + af = AF_INET6; 364 + ctx.saddr6.sin6_port = htons(atoi(opt.port)); 365 + ctx.saddr6.sin6_family = AF_INET6; 366 + } else { 367 + af = AF_INET; 368 + ctx.saddr.sin_port = htons(atoi(opt.port)); 369 + ctx.saddr.sin_family = AF_INET; 370 + } 371 + 372 + if (opt.ipv6) 373 + ret = inet_pton(af, opt.addr, &ctx.saddr6.sin6_addr); 374 + else 375 + ret = inet_pton(af, opt.addr, &ctx.saddr.sin_addr); 376 + if (ret <= 0) { 377 + fprintf(stderr, "inet_pton error for %s\n", optarg); 378 + printUsage(argv[0]); 379 + exit(1); 380 + } 381 + 382 + // Connect to server. 383 + fprintf(stdout, "Connecting to %s... (port=%s) to send %d pings\n", opt.addr, opt.port, opt.num_pings - 1); 384 + 385 + if ((ctx.sockfd = socket(af, SOCK_DGRAM, 0)) < 0) { 386 + fprintf(stderr, "socket() failed: (%d) %s\n", errno, strerror(errno)); 387 + exit(1); 388 + } 389 + 390 + if (opt.ipv6) 391 + ret = connect(ctx.sockfd, (struct sockaddr *)&ctx.saddr6, sizeof(struct sockaddr_in6)); 392 + else 393 + ret = connect(ctx.sockfd, (struct sockaddr *)&ctx.saddr, sizeof(struct sockaddr_in)); 394 + if (ret < 0) { 395 + fprintf(stderr, "connect() failed: (%d) %s\n", errno, strerror(errno)); 396 + exit(1); 397 + } 398 + 399 + // Setup ring. 400 + memset(&params, 0, sizeof(params)); 401 + memset(&ts, 0, sizeof(ts)); 402 + memset(&napi, 0, sizeof(napi)); 403 + 404 + params.flags = IORING_SETUP_SINGLE_ISSUER; 405 + if (opt.defer_tw) { 406 + params.flags |= IORING_SETUP_DEFER_TASKRUN; 407 + } else if (opt.sq_poll) { 408 + params.flags = IORING_SETUP_SQPOLL; 409 + params.sq_thread_idle = 50; 410 + } else { 411 + params.flags |= IORING_SETUP_COOP_TASKRUN; 412 + } 413 + 414 + ret = io_uring_queue_init_params(RINGSIZE, &ctx.ring, &params); 415 + if (ret) { 416 + fprintf(stderr, "io_uring_queue_init_params() failed: (%d) %s\n", 417 + ret, strerror(-ret)); 418 + exit(1); 419 + } 420 + 421 + if (opt.timeout || opt.prefer_busy_poll) { 422 + napi.prefer_busy_poll = opt.prefer_busy_poll; 423 + napi.busy_poll_to = opt.timeout; 424 + 425 + ret = io_uring_register_napi(&ctx.ring, &napi); 426 + if (ret) { 427 + fprintf(stderr, "io_uring_register_napi: %d\n", ret); 428 + exit(1); 429 + } 430 + } 431 + 432 + if (opt.busy_loop) 433 + tsPtr = &ts; 434 + else 435 + tsPtr = NULL; 436 + 437 + // Use realtime scheduler. 438 + setProcessScheduler(); 439 + 440 + // Copy payload. 441 + clock_gettime(CLOCK_REALTIME, &ctx.ts); 442 + 443 + // Setup context. 444 + ctx.napi_check = false; 445 + ctx.buffer_len = sizeof(struct timespec); 446 + ctx.num_pings = opt.num_pings; 447 + 448 + ctx.rtt_index = 0; 449 + ctx.rtt = (double *)malloc(sizeof(double) * opt.num_pings); 450 + if (!ctx.rtt) { 451 + fprintf(stderr, "Cannot allocate results array\n"); 452 + exit(1); 453 + } 454 + 455 + // Send initial message to get napi id. 456 + sendPing(&ctx); 457 + 458 + while (ctx.num_pings != 0) { 459 + int res; 460 + unsigned num_completed = 0; 461 + unsigned head; 462 + struct io_uring_cqe *cqe; 463 + 464 + do { 465 + res = io_uring_submit_and_wait_timeout(&ctx.ring, &cqe, 1, tsPtr, NULL); 466 + if (res >= 0) 467 + break; 468 + else if (res == -ETIME) 469 + continue; 470 + fprintf(stderr, "submit_and_wait: %d\n", res); 471 + exit(1); 472 + } while (1); 473 + 474 + io_uring_for_each_cqe(&ctx.ring, head, cqe) { 475 + ++num_completed; 476 + if (completion(&ctx, cqe)) 477 + goto out; 478 + } 479 + 480 + if (num_completed) 481 + io_uring_cq_advance(&ctx.ring, num_completed); 482 + } 483 + 484 + printStats(&ctx); 485 + 486 + out: 487 + // Clean up. 488 + if (opt.timeout || opt.prefer_busy_poll) { 489 + ret = io_uring_unregister_napi(&ctx.ring, &napi); 490 + if (ret) 491 + fprintf(stderr, "io_uring_unregister_napi: %d\n", ret); 492 + if (opt.timeout != napi.busy_poll_to || 493 + opt.prefer_busy_poll != napi.prefer_busy_poll) { 494 + fprintf(stderr, "Expected busy poll to = %d, got %d\n", 495 + opt.timeout, napi.busy_poll_to); 496 + fprintf(stderr, "Expected prefer busy poll = %d, got %d\n", 497 + opt.prefer_busy_poll, napi.prefer_busy_poll); 498 + } 499 + } else { 500 + ret = io_uring_unregister_napi(&ctx.ring, NULL); 501 + if (ret) 502 + fprintf(stderr, "io_uring_unregister_napi: %d\n", ret); 503 + } 504 + 505 + io_uring_queue_exit(&ctx.ring); 506 + free(ctx.rtt); 507 + close(ctx.sockfd); 508 + return 0; 509 + }

+450

vendor/liburing/examples/napi-busy-poll-server.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Simple ping/pong backend which can use the io_uring NAPI support. 4 + * 5 + * Needs to be run as root because it sets SCHED_FIFO scheduling class, 6 + * but will work without that. 7 + * 8 + * Example: 9 + * 10 + * sudo examples/napi-busy-poll-server -l -a 192.168.2.2 -n100000 \ 11 + * -p4444 -t10 -b -u 12 + * 13 + * will respond to 100k packages, using NAPI. 14 + */ 15 + #include <ctype.h> 16 + #include <errno.h> 17 + #include <getopt.h> 18 + #include <liburing.h> 19 + #include <math.h> 20 + #include <sched.h> 21 + #include <stdio.h> 22 + #include <stdlib.h> 23 + #include <string.h> 24 + #include <sys/types.h> 25 + #include <sys/socket.h> 26 + #include <time.h> 27 + #include <unistd.h> 28 + #include <arpa/inet.h> 29 + #include <netdb.h> 30 + #include <netinet/in.h> 31 + 32 + #define MAXBUFLEN 100 33 + #define PORTNOLEN 10 34 + #define ADDRLEN 80 35 + #define RINGSIZE 1024 36 + 37 + #define printable(ch) (isprint((unsigned char)ch) ? ch : '#') 38 + 39 + enum { 40 + IOURING_RECV, 41 + IOURING_SEND, 42 + IOURING_RECVMSG, 43 + IOURING_SENDMSG 44 + }; 45 + 46 + struct ctx 47 + { 48 + struct io_uring ring; 49 + union { 50 + struct sockaddr_in6 saddr6; 51 + struct sockaddr_in saddr; 52 + }; 53 + struct iovec iov; 54 + struct msghdr msg; 55 + 56 + int sockfd; 57 + int buffer_len; 58 + int num_pings; 59 + bool napi_check; 60 + 61 + union { 62 + char buffer[MAXBUFLEN]; 63 + struct timespec ts; 64 + }; 65 + }; 66 + 67 + struct options 68 + { 69 + int num_pings; 70 + __u32 timeout; 71 + 72 + bool listen; 73 + bool defer_tw; 74 + bool sq_poll; 75 + bool busy_loop; 76 + bool prefer_busy_poll; 77 + bool ipv6; 78 + 79 + char port[PORTNOLEN]; 80 + char addr[ADDRLEN]; 81 + }; 82 + 83 + static struct options opt; 84 + 85 + static struct option longopts[] = 86 + { 87 + {"address" , 1, NULL, 'a'}, 88 + {"busy" , 0, NULL, 'b'}, 89 + {"help" , 0, NULL, 'h'}, 90 + {"listen" , 0, NULL, 'l'}, 91 + {"num_pings", 1, NULL, 'n'}, 92 + {"port" , 1, NULL, 'p'}, 93 + {"prefer" , 1, NULL, 'u'}, 94 + {"sqpoll" , 0, NULL, 's'}, 95 + {"timeout" , 1, NULL, 't'}, 96 + {NULL , 0, NULL, 0 } 97 + }; 98 + 99 + static void printUsage(const char *name) 100 + { 101 + fprintf(stderr, 102 + "Usage: %s [-l|--listen] [-a|--address ip_address] [-p|--port port-no] [-s|--sqpoll]" 103 + " [-b|--busy] [-n|--num pings] [-t|--timeout busy-poll-timeout] [-u|--prefer] [-6] [-h|--help]\n" 104 + " --listen\n" 105 + "-l : Server mode\n" 106 + "--address\n" 107 + "-a : remote or local ipv6 address\n" 108 + "--busy\n" 109 + "-b : busy poll io_uring instead of blocking.\n" 110 + "--num_pings\n" 111 + "-n : number of pings\n" 112 + "--port\n" 113 + "-p : port\n" 114 + "--sqpoll\n" 115 + "-s : Configure io_uring to use SQPOLL thread\n" 116 + "--timeout\n" 117 + "-t : Configure NAPI busy poll timeout" 118 + "--prefer\n" 119 + "-u : prefer NAPI busy poll\n" 120 + "-6 : use IPV6\n" 121 + "--help\n" 122 + "-h : Display this usage message\n\n", 123 + name); 124 + } 125 + 126 + static void printError(const char *msg, int opt) 127 + { 128 + if (msg && opt) 129 + fprintf(stderr, "%s (-%c)\n", msg, printable(opt)); 130 + } 131 + 132 + static void setProcessScheduler(void) 133 + { 134 + struct sched_param param; 135 + 136 + param.sched_priority = sched_get_priority_max(SCHED_FIFO); 137 + if (sched_setscheduler(0, SCHED_FIFO, &param) < 0) 138 + fprintf(stderr, "sched_setscheduler() failed: (%d) %s\n", 139 + errno, strerror(errno)); 140 + } 141 + 142 + static uint64_t encodeUserData(char type, int fd) 143 + { 144 + return (uint32_t)fd | ((__u64)type << 56); 145 + } 146 + 147 + static void decodeUserData(uint64_t data, char *type, int *fd) 148 + { 149 + *type = data >> 56; 150 + *fd = data & 0xffffffffU; 151 + } 152 + 153 + static const char *opTypeToStr(char type) 154 + { 155 + const char *res; 156 + 157 + switch (type) { 158 + case IOURING_RECV: 159 + res = "IOURING_RECV"; 160 + break; 161 + case IOURING_SEND: 162 + res = "IOURING_SEND"; 163 + break; 164 + case IOURING_RECVMSG: 165 + res = "IOURING_RECVMSG"; 166 + break; 167 + case IOURING_SENDMSG: 168 + res = "IOURING_SENDMSG"; 169 + break; 170 + default: 171 + res = "Unknown"; 172 + } 173 + 174 + return res; 175 + } 176 + 177 + static void reportNapi(struct ctx *ctx) 178 + { 179 + unsigned int napi_id = 0; 180 + socklen_t len = sizeof(napi_id); 181 + 182 + getsockopt(ctx->sockfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id, &len); 183 + if (napi_id) 184 + printf(" napi id: %d\n", napi_id); 185 + else 186 + printf(" unassigned napi id\n"); 187 + 188 + ctx->napi_check = true; 189 + } 190 + 191 + static void sendPing(struct ctx *ctx) 192 + { 193 + struct io_uring_sqe *sqe = io_uring_get_sqe(&ctx->ring); 194 + 195 + io_uring_prep_sendmsg(sqe, ctx->sockfd, &ctx->msg, 0); 196 + sqe->user_data = encodeUserData(IOURING_SENDMSG, ctx->sockfd); 197 + } 198 + 199 + static void receivePing(struct ctx *ctx) 200 + { 201 + struct io_uring_sqe *sqe; 202 + 203 + bzero(&ctx->msg, sizeof(struct msghdr)); 204 + if (opt.ipv6) { 205 + ctx->msg.msg_name = &ctx->saddr6; 206 + ctx->msg.msg_namelen = sizeof(struct sockaddr_in6); 207 + } else { 208 + ctx->msg.msg_name = &ctx->saddr; 209 + ctx->msg.msg_namelen = sizeof(struct sockaddr_in); 210 + } 211 + ctx->iov.iov_base = ctx->buffer; 212 + ctx->iov.iov_len = MAXBUFLEN; 213 + ctx->msg.msg_iov = &ctx->iov; 214 + ctx->msg.msg_iovlen = 1; 215 + 216 + sqe = io_uring_get_sqe(&ctx->ring); 217 + io_uring_prep_recvmsg(sqe, ctx->sockfd, &ctx->msg, 0); 218 + sqe->user_data = encodeUserData(IOURING_RECVMSG, ctx->sockfd); 219 + } 220 + 221 + static void completion(struct ctx *ctx, struct io_uring_cqe *cqe) 222 + { 223 + char type; 224 + int fd; 225 + int res = cqe->res; 226 + 227 + decodeUserData(cqe->user_data, &type, &fd); 228 + if (res < 0) { 229 + fprintf(stderr, "unexpected %s failure: (%d) %s\n", 230 + opTypeToStr(type), -res, strerror(-res)); 231 + abort(); 232 + } 233 + 234 + switch (type) { 235 + case IOURING_SENDMSG: 236 + receivePing(ctx); 237 + --ctx->num_pings; 238 + break; 239 + case IOURING_RECVMSG: 240 + ctx->iov.iov_len = res; 241 + sendPing(ctx); 242 + if (!ctx->napi_check) 243 + reportNapi(ctx); 244 + break; 245 + default: 246 + fprintf(stderr, "unexpected %s completion\n", 247 + opTypeToStr(type)); 248 + abort(); 249 + break; 250 + } 251 + } 252 + 253 + int main(int argc, char *argv[]) 254 + { 255 + int flag; 256 + struct ctx ctx; 257 + struct __kernel_timespec *tsPtr; 258 + struct __kernel_timespec ts; 259 + struct io_uring_params params; 260 + struct io_uring_napi napi; 261 + int ret, af; 262 + 263 + memset(&opt, 0, sizeof(struct options)); 264 + 265 + // Process flags. 266 + while ((flag = getopt_long(argc, argv, ":lhs:bua:n:p:t:6d:", longopts, NULL)) != -1) { 267 + switch (flag) { 268 + case 'a': 269 + strcpy(opt.addr, optarg); 270 + break; 271 + case 'b': 272 + opt.busy_loop = true; 273 + break; 274 + case 'h': 275 + printUsage(argv[0]); 276 + exit(0); 277 + break; 278 + case 'l': 279 + opt.listen = true; 280 + break; 281 + case 'n': 282 + opt.num_pings = atoi(optarg) + 1; 283 + break; 284 + case 'p': 285 + strcpy(opt.port, optarg); 286 + break; 287 + case 's': 288 + opt.sq_poll = !!atoi(optarg); 289 + break; 290 + case 't': 291 + opt.timeout = atoi(optarg); 292 + break; 293 + case 'u': 294 + opt.prefer_busy_poll = true; 295 + break; 296 + case '6': 297 + opt.ipv6 = true; 298 + break; 299 + case 'd': 300 + opt.defer_tw = !!atoi(optarg); 301 + break; 302 + case ':': 303 + printError("Missing argument", optopt); 304 + printUsage(argv[0]); 305 + exit(-1); 306 + break; 307 + case '?': 308 + printError("Unrecognized option", optopt); 309 + printUsage(argv[0]); 310 + exit(-1); 311 + break; 312 + 313 + default: 314 + fprintf(stderr, "Fatal: Unexpected case in CmdLineProcessor switch()\n"); 315 + exit(-1); 316 + break; 317 + } 318 + } 319 + 320 + if (strlen(opt.addr) == 0) { 321 + fprintf(stderr, "address option is mandatory\n"); 322 + printUsage(argv[0]); 323 + exit(1); 324 + } 325 + 326 + if (opt.ipv6) { 327 + af = AF_INET6; 328 + ctx.saddr6.sin6_port = htons(atoi(opt.port)); 329 + ctx.saddr6.sin6_family = AF_INET6; 330 + } else { 331 + af = AF_INET; 332 + ctx.saddr.sin_port = htons(atoi(opt.port)); 333 + ctx.saddr.sin_family = AF_INET; 334 + } 335 + 336 + if (opt.ipv6) 337 + ret = inet_pton(AF_INET6, opt.addr, &ctx.saddr6.sin6_addr); 338 + else 339 + ret = inet_pton(AF_INET, opt.addr, &ctx.saddr.sin_addr); 340 + if (ret <= 0) { 341 + fprintf(stderr, "inet_pton error for %s\n", optarg); 342 + printUsage(argv[0]); 343 + exit(1); 344 + } 345 + 346 + // Connect to server. 347 + fprintf(stdout, "Listening %s : %s...\n", opt.addr, opt.port); 348 + 349 + if ((ctx.sockfd = socket(af, SOCK_DGRAM, 0)) < 0) { 350 + fprintf(stderr, "socket() failed: (%d) %s\n", errno, strerror(errno)); 351 + exit(1); 352 + } 353 + 354 + if (opt.ipv6) 355 + ret = bind(ctx.sockfd, (struct sockaddr *)&ctx.saddr6, sizeof(struct sockaddr_in6)); 356 + else 357 + ret = bind(ctx.sockfd, (struct sockaddr *)&ctx.saddr, sizeof(struct sockaddr_in)); 358 + if (ret < 0) { 359 + fprintf(stderr, "bind() failed: (%d) %s\n", errno, strerror(errno)); 360 + exit(1); 361 + } 362 + 363 + // Setup ring. 364 + memset(&params, 0, sizeof(params)); 365 + memset(&ts, 0, sizeof(ts)); 366 + memset(&napi, 0, sizeof(napi)); 367 + 368 + params.flags = IORING_SETUP_SINGLE_ISSUER; 369 + if (opt.defer_tw) { 370 + params.flags |= IORING_SETUP_DEFER_TASKRUN; 371 + } else if (opt.sq_poll) { 372 + params.flags = IORING_SETUP_SQPOLL; 373 + params.sq_thread_idle = 50; 374 + } else { 375 + params.flags |= IORING_SETUP_COOP_TASKRUN; 376 + } 377 + 378 + ret = io_uring_queue_init_params(RINGSIZE, &ctx.ring, &params); 379 + if (ret) { 380 + fprintf(stderr, "io_uring_queue_init_params() failed: (%d) %s\n", 381 + ret, strerror(-ret)); 382 + exit(1); 383 + } 384 + 385 + if (opt.timeout || opt.prefer_busy_poll) { 386 + napi.prefer_busy_poll = opt.prefer_busy_poll; 387 + napi.busy_poll_to = opt.timeout; 388 + 389 + ret = io_uring_register_napi(&ctx.ring, &napi); 390 + if (ret) { 391 + fprintf(stderr, "io_uring_register_napi: %d\n", ret); 392 + exit(1); 393 + } 394 + } 395 + 396 + if (opt.busy_loop) 397 + tsPtr = &ts; 398 + else 399 + tsPtr = NULL; 400 + 401 + // Use realtime scheduler. 402 + setProcessScheduler(); 403 + 404 + // Copy payload. 405 + clock_gettime(CLOCK_REALTIME, &ctx.ts); 406 + 407 + // Setup context. 408 + ctx.napi_check = false; 409 + ctx.buffer_len = sizeof(struct timespec); 410 + ctx.num_pings = opt.num_pings; 411 + 412 + // Receive initial message to get napi id. 413 + receivePing(&ctx); 414 + 415 + while (ctx.num_pings != 0) { 416 + int res; 417 + unsigned int num_completed = 0; 418 + unsigned int head; 419 + struct io_uring_cqe *cqe; 420 + 421 + do { 422 + res = io_uring_submit_and_wait_timeout(&ctx.ring, &cqe, 1, tsPtr, NULL); 423 + if (res >= 0) 424 + break; 425 + else if (res == -ETIME) 426 + continue; 427 + fprintf(stderr, "submit_and_wait: %d\n", res); 428 + exit(1); 429 + } while (1); 430 + 431 + io_uring_for_each_cqe(&ctx.ring, head, cqe) { 432 + ++num_completed; 433 + completion(&ctx, cqe); 434 + } 435 + 436 + if (num_completed) 437 + io_uring_cq_advance(&ctx.ring, num_completed); 438 + } 439 + 440 + // Clean up. 441 + if (opt.timeout || opt.prefer_busy_poll) { 442 + ret = io_uring_unregister_napi(&ctx.ring, &napi); 443 + if (ret) 444 + fprintf(stderr, "io_uring_unregister_napi: %d\n", ret); 445 + } 446 + 447 + io_uring_queue_exit(&ctx.ring); 448 + close(ctx.sockfd); 449 + return 0; 450 + }

+2461

vendor/liburing/examples/proxy.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Sample program that can act either as a packet sink, where it just receives 4 + * packets and doesn't do anything with them, or it can act as a proxy where it 5 + * receives packets and then sends them to a new destination. The proxy can 6 + * be unidirectional (-B0), or bi-direction (-B1). 7 + * 8 + * Examples: 9 + * 10 + * Act as a proxy, listening on port 4444, and send data to 192.168.2.6 on port 11 + * 4445. Use multishot receive, DEFER_TASKRUN, and fixed files 12 + * 13 + * ./proxy -m1 -r4444 -H 192.168.2.6 -p4445 14 + * 15 + * Same as above, but utilize send bundles (-C1, requires -u1 send_ring) as well 16 + * with ring provided send buffers, and recv bundles (-c1). 17 + * 18 + * ./proxy -m1 -c1 -u1 -C1 -r4444 -H 192.168.2.6 -p4445 19 + * 20 + * Act as a bi-directional proxy, listening on port 8888, and send data back 21 + * and forth between host and 192.168.2.6 on port 22. Use multishot receive, 22 + * DEFER_TASKRUN, fixed files, and buffers of size 1500. 23 + * 24 + * ./proxy -m1 -B1 -b1500 -r8888 -H 192.168.2.6 -p22 25 + * 26 + * Act a sink, listening on port 4445, using multishot receive, DEFER_TASKRUN, 27 + * and fixed files: 28 + * 29 + * ./proxy -m1 -s1 -r4445 30 + * 31 + * Run with -h to see a list of options, and their defaults. 32 + * 33 + * (C) 2024 Jens Axboe <axboe@kernel.dk> 34 + * 35 + */ 36 + #include <fcntl.h> 37 + #include <stdint.h> 38 + #include <netinet/in.h> 39 + #include <netinet/tcp.h> 40 + #include <arpa/inet.h> 41 + #include <stdio.h> 42 + #include <stdlib.h> 43 + #include <string.h> 44 + #include <sys/socket.h> 45 + #include <sys/time.h> 46 + #include <unistd.h> 47 + #include <sys/mman.h> 48 + #include <linux/mman.h> 49 + #include <locale.h> 50 + #include <assert.h> 51 + #include <pthread.h> 52 + #include <liburing.h> 53 + 54 + #include "proxy.h" 55 + #include "helpers.h" 56 + 57 + /* 58 + * Will go away once/if bundles are upstreamed and we put the generic 59 + * definitions in the kernel header. 60 + */ 61 + #ifndef IORING_RECVSEND_BUNDLE 62 + #define IORING_RECVSEND_BUNDLE (1U << 4) 63 + #endif 64 + #ifndef IORING_FEAT_SEND_BUF_SELECT 65 + #define IORING_FEAT_SEND_BUF_SELECT (1U << 14) 66 + #endif 67 + 68 + static int cur_bgid = 1; 69 + static int nr_conns; 70 + static int open_conns; 71 + static long page_size; 72 + 73 + static unsigned long event_loops; 74 + static unsigned long events; 75 + 76 + static int recv_mshot = 1; 77 + static int sqpoll; 78 + static int defer_tw = 1; 79 + static int is_sink; 80 + static int fixed_files = 1; 81 + static char *host = "192.168.3.2"; 82 + static int send_port = 4445; 83 + static int receive_port = 4444; 84 + static int buf_size = 32; 85 + static int bidi; 86 + static int ipv6; 87 + static int napi; 88 + static int napi_timeout; 89 + static int wait_batch = 1; 90 + static int wait_usec = 1000000; 91 + static int rcv_msg; 92 + static int snd_msg; 93 + static int snd_zc; 94 + static int send_ring = -1; 95 + static int snd_bundle; 96 + static int rcv_bundle; 97 + static int use_huge; 98 + static int ext_stat; 99 + static int verbose; 100 + 101 + static int nr_bufs = 256; 102 + static int br_mask; 103 + 104 + static int ring_size = 128; 105 + 106 + static pthread_mutex_t thread_lock; 107 + static struct timeval last_housekeeping; 108 + 109 + /* 110 + * For sendmsg/recvmsg. recvmsg just has a single vec, sendmsg will have 111 + * two vecs - one that is currently submitted and being sent, and one that 112 + * is being prepared. When a new sendmsg is issued, we'll swap which one we 113 + * use. For send, even though we don't pass in the iovec itself, we use the 114 + * vec to serialize the sends to avoid reordering. 115 + */ 116 + struct msg_vec { 117 + struct iovec *iov; 118 + /* length of allocated vec */ 119 + int vec_size; 120 + /* length currently being used */ 121 + int iov_len; 122 + /* only for send, current index we're processing */ 123 + int cur_iov; 124 + }; 125 + 126 + struct io_msg { 127 + struct msghdr msg; 128 + struct msg_vec vecs[2]; 129 + /* current msg_vec being prepared */ 130 + int vec_index; 131 + }; 132 + 133 + /* 134 + * Per socket stats per connection. For bi-directional, we'll have both 135 + * sends and receives on each socket, this helps track them seperately. 136 + * For sink or one directional, each of the two stats will be only sends 137 + * or receives, not both. 138 + */ 139 + struct conn_dir { 140 + int index; 141 + 142 + int pending_shutdown; 143 + int pending_send; 144 + int pending_recv; 145 + 146 + int snd_notif; 147 + 148 + int out_buffers; 149 + 150 + int rcv, rcv_shrt, rcv_enobufs, rcv_mshot; 151 + int snd, snd_shrt, snd_enobufs, snd_busy, snd_mshot; 152 + 153 + int snd_next_bid; 154 + int rcv_next_bid; 155 + 156 + int *rcv_bucket; 157 + int *snd_bucket; 158 + 159 + unsigned long in_bytes, out_bytes; 160 + 161 + /* only ever have a single recv pending */ 162 + struct io_msg io_rcv_msg; 163 + 164 + /* one send that is inflight, and one being prepared for the next one */ 165 + struct io_msg io_snd_msg; 166 + }; 167 + 168 + enum { 169 + CONN_F_STARTED = 1, 170 + CONN_F_DISCONNECTING = 2, 171 + CONN_F_DISCONNECTED = 4, 172 + CONN_F_PENDING_SHUTDOWN = 8, 173 + CONN_F_STATS_SHOWN = 16, 174 + CONN_F_END_TIME = 32, 175 + CONN_F_REAPED = 64, 176 + }; 177 + 178 + /* 179 + * buffer ring belonging to a connection 180 + */ 181 + struct conn_buf_ring { 182 + struct io_uring_buf_ring *br; 183 + void *buf; 184 + int bgid; 185 + }; 186 + 187 + struct conn { 188 + struct io_uring ring; 189 + 190 + /* receive side buffer ring, new data arrives here */ 191 + struct conn_buf_ring in_br; 192 + /* if send_ring is used, outgoing data to send */ 193 + struct conn_buf_ring out_br; 194 + 195 + int tid; 196 + int in_fd, out_fd; 197 + int pending_cancels; 198 + int flags; 199 + 200 + struct conn_dir cd[2]; 201 + 202 + struct timeval start_time, end_time; 203 + 204 + union { 205 + struct sockaddr_in addr; 206 + struct sockaddr_in6 addr6; 207 + }; 208 + 209 + pthread_t thread; 210 + pthread_barrier_t startup_barrier; 211 + }; 212 + 213 + #define MAX_CONNS 1024 214 + static struct conn conns[MAX_CONNS]; 215 + 216 + #define vlog(str, ...) do { \ 217 + if (verbose) \ 218 + printf(str, ##__VA_ARGS__); \ 219 + } while (0) 220 + 221 + static int prep_next_send(struct io_uring *ring, struct conn *c, 222 + struct conn_dir *cd, int fd); 223 + static void *thread_main(void *data); 224 + 225 + static struct conn *cqe_to_conn(struct io_uring_cqe *cqe) 226 + { 227 + struct userdata ud = { .val = cqe->user_data }; 228 + 229 + return &conns[ud.op_tid & TID_MASK]; 230 + } 231 + 232 + static struct conn_dir *cqe_to_conn_dir(struct conn *c, 233 + struct io_uring_cqe *cqe) 234 + { 235 + int fd = cqe_to_fd(cqe); 236 + 237 + return &c->cd[fd != c->in_fd]; 238 + } 239 + 240 + static int other_dir_fd(struct conn *c, int fd) 241 + { 242 + if (c->in_fd == fd) 243 + return c->out_fd; 244 + return c->in_fd; 245 + } 246 + 247 + /* currently active msg_vec */ 248 + static struct msg_vec *msg_vec(struct io_msg *imsg) 249 + { 250 + return &imsg->vecs[imsg->vec_index]; 251 + } 252 + 253 + static struct msg_vec *snd_msg_vec(struct conn_dir *cd) 254 + { 255 + return msg_vec(&cd->io_snd_msg); 256 + } 257 + 258 + /* 259 + * Goes from accept new connection -> create socket, connect to end 260 + * point, prepare recv, on receive do send (unless sink). If either ends 261 + * disconnects, we transition to shutdown and then close. 262 + */ 263 + enum { 264 + __ACCEPT = 1, 265 + __SOCK = 2, 266 + __CONNECT = 3, 267 + __RECV = 4, 268 + __RECVMSG = 5, 269 + __SEND = 6, 270 + __SENDMSG = 7, 271 + __SHUTDOWN = 8, 272 + __CANCEL = 9, 273 + __CLOSE = 10, 274 + __FD_PASS = 11, 275 + __NOP = 12, 276 + __STOP = 13, 277 + }; 278 + 279 + struct error_handler { 280 + const char *name; 281 + int (*error_fn)(struct error_handler *, struct io_uring *, struct io_uring_cqe *); 282 + }; 283 + 284 + static int recv_error(struct error_handler *err, struct io_uring *ring, 285 + struct io_uring_cqe *cqe); 286 + static int send_error(struct error_handler *err, struct io_uring *ring, 287 + struct io_uring_cqe *cqe); 288 + 289 + static int default_error(struct error_handler *err, 290 + struct io_uring __attribute__((__unused__)) *ring, 291 + struct io_uring_cqe *cqe) 292 + { 293 + struct conn *c = cqe_to_conn(cqe); 294 + 295 + fprintf(stderr, "%d: %s error %s\n", c->tid, err->name, strerror(-cqe->res)); 296 + fprintf(stderr, "fd=%d, bid=%d\n", cqe_to_fd(cqe), cqe_to_bid(cqe)); 297 + return 1; 298 + } 299 + 300 + /* 301 + * Move error handling out of the normal handling path, cleanly seperating 302 + * them. If an opcode doesn't need any error handling, set it to NULL. If 303 + * it wants to stop the connection at that point and not do anything else, 304 + * then the default handler can be used. Only receive has proper error 305 + * handling, as we can get -ENOBUFS which is not a fatal condition. It just 306 + * means we need to wait on buffer replenishing before re-arming the receive. 307 + */ 308 + static struct error_handler error_handlers[] = { 309 + { .name = "NULL", .error_fn = NULL, }, 310 + { .name = "ACCEPT", .error_fn = default_error, }, 311 + { .name = "SOCK", .error_fn = default_error, }, 312 + { .name = "CONNECT", .error_fn = default_error, }, 313 + { .name = "RECV", .error_fn = recv_error, }, 314 + { .name = "RECVMSG", .error_fn = recv_error, }, 315 + { .name = "SEND", .error_fn = send_error, }, 316 + { .name = "SENDMSG", .error_fn = send_error, }, 317 + { .name = "SHUTDOWN", .error_fn = NULL, }, 318 + { .name = "CANCEL", .error_fn = NULL, }, 319 + { .name = "CLOSE", .error_fn = NULL, }, 320 + { .name = "FD_PASS", .error_fn = default_error, }, 321 + { .name = "NOP", .error_fn = NULL, }, 322 + { .name = "STOP", .error_fn = default_error, }, 323 + }; 324 + 325 + static void free_buffer_ring(struct io_uring *ring, struct conn_buf_ring *cbr) 326 + { 327 + if (!cbr->br) 328 + return; 329 + 330 + io_uring_free_buf_ring(ring, cbr->br, nr_bufs, cbr->bgid); 331 + cbr->br = NULL; 332 + if (use_huge) 333 + munmap(cbr->buf, buf_size * nr_bufs); 334 + else 335 + free(cbr->buf); 336 + } 337 + 338 + static void free_buffer_rings(struct io_uring *ring, struct conn *c) 339 + { 340 + free_buffer_ring(ring, &c->in_br); 341 + free_buffer_ring(ring, &c->out_br); 342 + } 343 + 344 + /* 345 + * Setup a ring provided buffer ring for each connection. If we get -ENOBUFS 346 + * on receive, for multishot receive we'll wait for half the provided buffers 347 + * to be returned by pending sends, then re-arm the multishot receive. If 348 + * this happens too frequently (see enobufs= stat), then the ring size is 349 + * likely too small. Use -nXX to make it bigger. See recv_enobufs(). 350 + * 351 + * The alternative here would be to use the older style provided buffers, 352 + * where you simply setup a buffer group and use SQEs with 353 + * io_urign_prep_provide_buffers() to add to the pool. But that approach is 354 + * slower and has been deprecated by using the faster ring provided buffers. 355 + */ 356 + static int setup_recv_ring(struct io_uring *ring, struct conn *c) 357 + { 358 + struct conn_buf_ring *cbr = &c->in_br; 359 + int ret, i; 360 + size_t len; 361 + void *ptr; 362 + 363 + len = buf_size * nr_bufs; 364 + if (use_huge) { 365 + cbr->buf = mmap(NULL, len, PROT_READ|PROT_WRITE, 366 + MAP_PRIVATE|MAP_HUGETLB|MAP_HUGE_2MB|MAP_ANONYMOUS, 367 + -1, 0); 368 + if (cbr->buf == MAP_FAILED) { 369 + perror("mmap"); 370 + return 1; 371 + } 372 + } else { 373 + if (posix_memalign(&cbr->buf, page_size, len)) { 374 + perror("posix memalign"); 375 + return 1; 376 + } 377 + } 378 + cbr->br = io_uring_setup_buf_ring(ring, nr_bufs, cbr->bgid, 0, &ret); 379 + if (!cbr->br) { 380 + fprintf(stderr, "Buffer ring register failed %d\n", ret); 381 + return 1; 382 + } 383 + 384 + ptr = cbr->buf; 385 + for (i = 0; i < nr_bufs; i++) { 386 + vlog("%d: add bid %d, data %p\n", c->tid, i, ptr); 387 + io_uring_buf_ring_add(cbr->br, ptr, buf_size, i, br_mask, i); 388 + ptr += buf_size; 389 + } 390 + io_uring_buf_ring_advance(cbr->br, nr_bufs); 391 + printf("%d: recv buffer ring bgid %d, bufs %d\n", c->tid, cbr->bgid, nr_bufs); 392 + return 0; 393 + } 394 + 395 + /* 396 + * If 'send_ring' is used and the kernel supports it, we can skip serializing 397 + * sends as the data will be ordered regardless. This reduces the send handling 398 + * complexity, as buffers can always be added to the outgoing ring and will be 399 + * processed in the order in which they were added. 400 + */ 401 + static int setup_send_ring(struct io_uring *ring, struct conn *c) 402 + { 403 + struct conn_buf_ring *cbr = &c->out_br; 404 + int ret; 405 + 406 + cbr->br = io_uring_setup_buf_ring(ring, nr_bufs, cbr->bgid, 0, &ret); 407 + if (!cbr->br) { 408 + fprintf(stderr, "Buffer ring register failed %d\n", ret); 409 + return 1; 410 + } 411 + 412 + printf("%d: send buffer ring bgid %d, bufs %d\n", c->tid, cbr->bgid, nr_bufs); 413 + return 0; 414 + } 415 + 416 + static int setup_send_zc(struct io_uring *ring, struct conn *c) 417 + { 418 + struct iovec *iovs; 419 + void *buf; 420 + int i, ret; 421 + 422 + if (snd_msg) 423 + return 0; 424 + 425 + buf = c->in_br.buf; 426 + iovs = calloc(nr_bufs, sizeof(struct iovec)); 427 + for (i = 0; i < nr_bufs; i++) { 428 + iovs[i].iov_base = buf; 429 + iovs[i].iov_len = buf_size; 430 + buf += buf_size; 431 + } 432 + 433 + ret = io_uring_register_buffers(ring, iovs, nr_bufs); 434 + if (ret) { 435 + fprintf(stderr, "failed registering buffers: %d\n", ret); 436 + free(iovs); 437 + return ret; 438 + } 439 + free(iovs); 440 + return 0; 441 + } 442 + 443 + /* 444 + * Setup an input and output buffer ring. 445 + */ 446 + static int setup_buffer_rings(struct io_uring *ring, struct conn *c) 447 + { 448 + int ret; 449 + 450 + /* no locking needed on cur_bgid, parent serializes setup */ 451 + c->in_br.bgid = cur_bgid++; 452 + c->out_br.bgid = cur_bgid++; 453 + c->out_br.br = NULL; 454 + 455 + ret = setup_recv_ring(ring, c); 456 + if (ret) 457 + return ret; 458 + if (is_sink) 459 + return 0; 460 + if (snd_zc) { 461 + ret = setup_send_zc(ring, c); 462 + if (ret) 463 + return ret; 464 + } 465 + if (send_ring) { 466 + ret = setup_send_ring(ring, c); 467 + if (ret) { 468 + free_buffer_ring(ring, &c->in_br); 469 + return ret; 470 + } 471 + } 472 + 473 + return 0; 474 + } 475 + 476 + struct bucket_stat { 477 + int nr_packets; 478 + int count; 479 + }; 480 + 481 + static int stat_cmp(const void *p1, const void *p2) 482 + { 483 + const struct bucket_stat *b1 = p1; 484 + const struct bucket_stat *b2 = p2; 485 + 486 + if (b1->count < b2->count) 487 + return 1; 488 + else if (b1->count > b2->count) 489 + return -1; 490 + return 0; 491 + } 492 + 493 + static void show_buckets(struct conn_dir *cd) 494 + { 495 + unsigned long snd_total, rcv_total; 496 + struct bucket_stat *rstat, *sstat; 497 + int i; 498 + 499 + if (!cd->rcv_bucket || !cd->snd_bucket) 500 + return; 501 + 502 + rstat = calloc(nr_bufs + 1, sizeof(struct bucket_stat)); 503 + sstat = calloc(nr_bufs + 1, sizeof(struct bucket_stat)); 504 + 505 + snd_total = rcv_total = 0; 506 + for (i = 0; i <= nr_bufs; i++) { 507 + snd_total += cd->snd_bucket[i]; 508 + sstat[i].nr_packets = i; 509 + sstat[i].count = cd->snd_bucket[i]; 510 + rcv_total += cd->rcv_bucket[i]; 511 + rstat[i].nr_packets = i; 512 + rstat[i].count = cd->rcv_bucket[i]; 513 + } 514 + 515 + if (!snd_total && !rcv_total) { 516 + free(sstat); 517 + free(rstat); 518 + } 519 + if (snd_total) 520 + qsort(sstat, nr_bufs, sizeof(struct bucket_stat), stat_cmp); 521 + if (rcv_total) 522 + qsort(rstat, nr_bufs, sizeof(struct bucket_stat), stat_cmp); 523 + 524 + printf("\t Packets per recv/send:\n"); 525 + for (i = 0; i <= nr_bufs; i++) { 526 + double snd_prc = 0.0, rcv_prc = 0.0; 527 + if (!rstat[i].count && !sstat[i].count) 528 + continue; 529 + if (rstat[i].count) 530 + rcv_prc = 100.0 * (rstat[i].count / (double) rcv_total); 531 + if (sstat[i].count) 532 + snd_prc = 100.0 * (sstat[i].count / (double) snd_total); 533 + printf("\t bucket(%3d/%3d): rcv=%u (%.2f%%) snd=%u (%.2f%%)\n", 534 + rstat[i].nr_packets, sstat[i].nr_packets, 535 + rstat[i].count, rcv_prc, 536 + sstat[i].count, snd_prc); 537 + } 538 + 539 + free(sstat); 540 + free(rstat); 541 + } 542 + 543 + static void __show_stats(struct conn *c) 544 + { 545 + unsigned long msec, qps; 546 + unsigned long bytes, bw; 547 + struct conn_dir *cd; 548 + int i; 549 + 550 + if (c->flags & (CONN_F_STATS_SHOWN | CONN_F_REAPED)) 551 + return; 552 + if (!(c->flags & CONN_F_STARTED)) 553 + return; 554 + 555 + if (!(c->flags & CONN_F_END_TIME)) 556 + gettimeofday(&c->end_time, NULL); 557 + 558 + msec = (c->end_time.tv_sec - c->start_time.tv_sec) * 1000; 559 + msec += (c->end_time.tv_usec - c->start_time.tv_usec) / 1000; 560 + 561 + qps = 0; 562 + for (i = 0; i < 2; i++) 563 + qps += c->cd[i].rcv + c->cd[i].snd; 564 + 565 + if (!qps) 566 + return; 567 + 568 + if (msec) 569 + qps = (qps * 1000) / msec; 570 + 571 + printf("Conn %d/(in_fd=%d, out_fd=%d): qps=%lu, msec=%lu\n", c->tid, 572 + c->in_fd, c->out_fd, qps, msec); 573 + 574 + bytes = 0; 575 + for (i = 0; i < 2; i++) { 576 + cd = &c->cd[i]; 577 + 578 + if (!cd->in_bytes && !cd->out_bytes && !cd->snd && !cd->rcv) 579 + continue; 580 + 581 + bytes += cd->in_bytes; 582 + bytes += cd->out_bytes; 583 + 584 + printf("\t%3d: rcv=%u (short=%u, enobufs=%d), snd=%u (short=%u," 585 + " busy=%u, enobufs=%d)\n", i, cd->rcv, cd->rcv_shrt, 586 + cd->rcv_enobufs, cd->snd, cd->snd_shrt, cd->snd_busy, 587 + cd->snd_enobufs); 588 + printf("\t : in_bytes=%lu (Kb %lu), out_bytes=%lu (Kb %lu)\n", 589 + cd->in_bytes, cd->in_bytes >> 10, 590 + cd->out_bytes, cd->out_bytes >> 10); 591 + printf("\t : mshot_rcv=%d, mshot_snd=%d\n", cd->rcv_mshot, 592 + cd->snd_mshot); 593 + show_buckets(cd); 594 + 595 + } 596 + if (msec) { 597 + bytes *= 8UL; 598 + bw = bytes / 1000; 599 + bw /= msec; 600 + printf("\tBW=%'luMbit\n", bw); 601 + } 602 + 603 + c->flags |= CONN_F_STATS_SHOWN; 604 + } 605 + 606 + static void show_stats(void) 607 + { 608 + float events_per_loop = 0.0; 609 + static int stats_shown; 610 + int i; 611 + 612 + if (stats_shown) 613 + return; 614 + 615 + if (events) 616 + events_per_loop = (float) events / (float) event_loops; 617 + 618 + printf("Event loops: %lu, events %lu, events per loop %.2f\n", event_loops, 619 + events, events_per_loop); 620 + 621 + for (i = 0; i < MAX_CONNS; i++) { 622 + struct conn *c = &conns[i]; 623 + 624 + __show_stats(c); 625 + } 626 + stats_shown = 1; 627 + } 628 + 629 + static void sig_int(int __attribute__((__unused__)) sig) 630 + { 631 + printf("\n"); 632 + show_stats(); 633 + exit(1); 634 + } 635 + 636 + /* 637 + * Special cased for SQPOLL only, as we don't control when SQEs are consumed if 638 + * that is used. Hence we may need to wait for the SQPOLL thread to keep up 639 + * until we can get a new SQE. All other cases will break immediately, with a 640 + * fresh SQE. 641 + * 642 + * If we grossly undersized our SQ ring, getting a NULL sqe can happen even 643 + * for the !SQPOLL case if we're handling a lot of CQEs in our event loop 644 + * and multishot isn't used. We can do io_uring_submit() to flush what we 645 + * have here. Only caveat here is that if linked requests are used, SQEs 646 + * would need to be allocated upfront as a link chain is only valid within 647 + * a single submission cycle. 648 + */ 649 + static struct io_uring_sqe *get_sqe(struct io_uring *ring) 650 + { 651 + struct io_uring_sqe *sqe; 652 + 653 + do { 654 + sqe = io_uring_get_sqe(ring); 655 + if (sqe) 656 + break; 657 + if (!sqpoll) 658 + io_uring_submit(ring); 659 + else 660 + io_uring_sqring_wait(ring); 661 + } while (1); 662 + 663 + return sqe; 664 + } 665 + 666 + /* 667 + * See __encode_userdata() for how we encode sqe->user_data, which is passed 668 + * back as cqe->user_data at completion time. 669 + */ 670 + static void encode_userdata(struct io_uring_sqe *sqe, struct conn *c, int op, 671 + int bid, int fd) 672 + { 673 + __encode_userdata(sqe, c->tid, op, bid, fd); 674 + } 675 + 676 + static void __submit_receive(struct io_uring *ring, struct conn *c, 677 + struct conn_dir *cd, int fd) 678 + { 679 + struct conn_buf_ring *cbr = &c->in_br; 680 + struct io_uring_sqe *sqe; 681 + 682 + vlog("%d: submit receive fd=%d\n", c->tid, fd); 683 + 684 + assert(!cd->pending_recv); 685 + cd->pending_recv = 1; 686 + 687 + /* 688 + * For both recv and multishot receive, we use the ring provided 689 + * buffers. These are handed to the application ahead of time, and 690 + * are consumed when a receive triggers. Note that the address and 691 + * length of the receive are set to NULL/0, and we assign the 692 + * sqe->buf_group to tell the kernel which buffer group ID to pick 693 + * a buffer from. Finally, IOSQE_BUFFER_SELECT is set to tell the 694 + * kernel that we want a buffer picked for this request, we are not 695 + * passing one in with the request. 696 + */ 697 + sqe = get_sqe(ring); 698 + if (rcv_msg) { 699 + struct io_msg *imsg = &cd->io_rcv_msg; 700 + struct msghdr *msg = &imsg->msg; 701 + 702 + memset(msg, 0, sizeof(*msg)); 703 + msg->msg_iov = msg_vec(imsg)->iov; 704 + msg->msg_iovlen = msg_vec(imsg)->iov_len; 705 + 706 + if (recv_mshot) { 707 + cd->rcv_mshot++; 708 + io_uring_prep_recvmsg_multishot(sqe, fd, &imsg->msg, 0); 709 + } else { 710 + io_uring_prep_recvmsg(sqe, fd, &imsg->msg, 0); 711 + } 712 + } else { 713 + if (recv_mshot) { 714 + cd->rcv_mshot++; 715 + io_uring_prep_recv_multishot(sqe, fd, NULL, 0, 0); 716 + } else { 717 + io_uring_prep_recv(sqe, fd, NULL, 0, 0); 718 + } 719 + } 720 + encode_userdata(sqe, c, __RECV, 0, fd); 721 + sqe->buf_group = cbr->bgid; 722 + sqe->flags |= IOSQE_BUFFER_SELECT; 723 + if (fixed_files) 724 + sqe->flags |= IOSQE_FIXED_FILE; 725 + if (rcv_bundle) 726 + sqe->ioprio |= IORING_RECVSEND_BUNDLE; 727 + } 728 + 729 + /* 730 + * One directional just arms receive on our in_fd 731 + */ 732 + static void submit_receive(struct io_uring *ring, struct conn *c) 733 + { 734 + __submit_receive(ring, c, &c->cd[0], c->in_fd); 735 + } 736 + 737 + /* 738 + * Bi-directional arms receive on both in and out fd 739 + */ 740 + static void submit_bidi_receive(struct io_uring *ring, struct conn *c) 741 + { 742 + __submit_receive(ring, c, &c->cd[0], c->in_fd); 743 + __submit_receive(ring, c, &c->cd[1], c->out_fd); 744 + } 745 + 746 + /* 747 + * We hit -ENOBUFS, which means that we ran out of buffers in our current 748 + * provided buffer group. This can happen if there's an imbalance between the 749 + * receives coming in and the sends being processed, particularly with multishot 750 + * receive as they can trigger very quickly. If this happens, defer arming a 751 + * new receive until we've replenished half of the buffer pool by processing 752 + * pending sends. 753 + */ 754 + static void recv_enobufs(struct io_uring *ring, struct conn *c, 755 + struct conn_dir *cd, int fd) 756 + { 757 + vlog("%d: enobufs hit\n", c->tid); 758 + 759 + cd->rcv_enobufs++; 760 + 761 + /* 762 + * If we're a sink, mark rcv as rearm. If we're not, then mark us as 763 + * needing a rearm for receive and send. The completing send will 764 + * kick the recv rearm. 765 + */ 766 + if (!is_sink) { 767 + int do_recv_arm = 1; 768 + 769 + if (!cd->pending_send) 770 + do_recv_arm = !prep_next_send(ring, c, cd, fd); 771 + if (do_recv_arm) 772 + __submit_receive(ring, c, &c->cd[0], c->in_fd); 773 + } else { 774 + __submit_receive(ring, c, &c->cd[0], c->in_fd); 775 + } 776 + } 777 + 778 + /* 779 + * Kill this socket - submit a shutdown and link a close to it. We don't 780 + * care about shutdown status, so mark it as not needing to post a CQE unless 781 + * it fails. 782 + */ 783 + static void queue_shutdown_close(struct io_uring *ring, struct conn *c, int fd) 784 + { 785 + struct io_uring_sqe *sqe1, *sqe2; 786 + 787 + /* 788 + * On the off chance that we run out of SQEs after the first one, 789 + * grab two upfront. This it to prevent our link not working if 790 + * get_sqe() ends up doing submissions to free up an SQE, as links 791 + * are not valid across separate submissions. 792 + */ 793 + sqe1 = get_sqe(ring); 794 + sqe2 = get_sqe(ring); 795 + 796 + io_uring_prep_shutdown(sqe1, fd, SHUT_RDWR); 797 + if (fixed_files) 798 + sqe1->flags |= IOSQE_FIXED_FILE; 799 + sqe1->flags |= IOSQE_IO_LINK | IOSQE_CQE_SKIP_SUCCESS; 800 + encode_userdata(sqe1, c, __SHUTDOWN, 0, fd); 801 + 802 + if (fixed_files) 803 + io_uring_prep_close_direct(sqe2, fd); 804 + else 805 + io_uring_prep_close(sqe2, fd); 806 + encode_userdata(sqe2, c, __CLOSE, 0, fd); 807 + } 808 + 809 + /* 810 + * This connection is going away, queue a cancel for any pending recv, for 811 + * example, we have pending for this ring. For completeness, we issue a cancel 812 + * for any request we have pending for both in_fd and out_fd. 813 + */ 814 + static void queue_cancel(struct io_uring *ring, struct conn *c) 815 + { 816 + struct io_uring_sqe *sqe; 817 + int flags = 0; 818 + 819 + if (fixed_files) 820 + flags |= IORING_ASYNC_CANCEL_FD_FIXED; 821 + 822 + sqe = get_sqe(ring); 823 + io_uring_prep_cancel_fd(sqe, c->in_fd, flags); 824 + encode_userdata(sqe, c, __CANCEL, 0, c->in_fd); 825 + c->pending_cancels++; 826 + 827 + if (c->out_fd != -1) { 828 + sqe = get_sqe(ring); 829 + io_uring_prep_cancel_fd(sqe, c->out_fd, flags); 830 + encode_userdata(sqe, c, __CANCEL, 0, c->out_fd); 831 + c->pending_cancels++; 832 + } 833 + 834 + io_uring_submit(ring); 835 + } 836 + 837 + static int pending_shutdown(struct conn *c) 838 + { 839 + return c->cd[0].pending_shutdown + c->cd[1].pending_shutdown; 840 + } 841 + 842 + static bool should_shutdown(struct conn *c) 843 + { 844 + int i; 845 + 846 + if (!pending_shutdown(c)) 847 + return false; 848 + if (is_sink) 849 + return true; 850 + if (!bidi) 851 + return c->cd[0].in_bytes == c->cd[1].out_bytes; 852 + 853 + for (i = 0; i < 2; i++) { 854 + if (c->cd[0].rcv != c->cd[1].snd) 855 + return false; 856 + if (c->cd[1].rcv != c->cd[0].snd) 857 + return false; 858 + } 859 + 860 + return true; 861 + } 862 + 863 + /* 864 + * Close this connection - send a ring message to the connection with intent 865 + * to stop. When the client gets the message, it will initiate the stop. 866 + */ 867 + static void __close_conn(struct io_uring *ring, struct conn *c) 868 + { 869 + struct io_uring_sqe *sqe; 870 + uint64_t user_data; 871 + 872 + printf("Client %d: queueing stop\n", c->tid); 873 + 874 + user_data = __raw_encode(c->tid, __STOP, 0, 0); 875 + sqe = io_uring_get_sqe(ring); 876 + io_uring_prep_msg_ring(sqe, c->ring.ring_fd, 0, user_data, 0); 877 + encode_userdata(sqe, c, __NOP, 0, 0); 878 + io_uring_submit(ring); 879 + } 880 + 881 + static void close_cd(struct conn *c, struct conn_dir *cd) 882 + { 883 + cd->pending_shutdown = 1; 884 + 885 + if (cd->pending_send) 886 + return; 887 + 888 + if (!(c->flags & CONN_F_PENDING_SHUTDOWN)) { 889 + gettimeofday(&c->end_time, NULL); 890 + c->flags |= CONN_F_PENDING_SHUTDOWN | CONN_F_END_TIME; 891 + } 892 + } 893 + 894 + /* 895 + * We're done with this buffer, add it back to our pool so the kernel is 896 + * free to use it again. 897 + */ 898 + static int replenish_buffer(struct conn_buf_ring *cbr, int bid, int offset) 899 + { 900 + void *this_buf = cbr->buf + bid * buf_size; 901 + 902 + assert(bid < nr_bufs); 903 + 904 + io_uring_buf_ring_add(cbr->br, this_buf, buf_size, bid, br_mask, offset); 905 + return buf_size; 906 + } 907 + 908 + /* 909 + * Iterate buffers from '*bid' and with a total size of 'bytes' and add them 910 + * back to our receive ring so they can be reused for new receives. 911 + */ 912 + static int replenish_buffers(struct conn *c, int *bid, int bytes) 913 + { 914 + struct conn_buf_ring *cbr = &c->in_br; 915 + int nr_packets = 0; 916 + 917 + while (bytes) { 918 + int this_len = replenish_buffer(cbr, *bid, nr_packets); 919 + 920 + if (this_len > bytes) 921 + this_len = bytes; 922 + bytes -= this_len; 923 + 924 + *bid = (*bid + 1) & (nr_bufs - 1); 925 + nr_packets++; 926 + } 927 + 928 + io_uring_buf_ring_advance(cbr->br, nr_packets); 929 + return nr_packets; 930 + } 931 + 932 + static void free_mvec(struct msg_vec *mvec) 933 + { 934 + free(mvec->iov); 935 + mvec->iov = NULL; 936 + } 937 + 938 + static void init_mvec(struct msg_vec *mvec) 939 + { 940 + memset(mvec, 0, sizeof(*mvec)); 941 + mvec->iov = malloc(sizeof(struct iovec)); 942 + mvec->vec_size = 1; 943 + } 944 + 945 + static void init_msgs(struct conn_dir *cd) 946 + { 947 + memset(&cd->io_snd_msg, 0, sizeof(cd->io_snd_msg)); 948 + memset(&cd->io_rcv_msg, 0, sizeof(cd->io_rcv_msg)); 949 + init_mvec(&cd->io_snd_msg.vecs[0]); 950 + init_mvec(&cd->io_snd_msg.vecs[1]); 951 + init_mvec(&cd->io_rcv_msg.vecs[0]); 952 + } 953 + 954 + static void free_msgs(struct conn_dir *cd) 955 + { 956 + free_mvec(&cd->io_snd_msg.vecs[0]); 957 + free_mvec(&cd->io_snd_msg.vecs[1]); 958 + free_mvec(&cd->io_rcv_msg.vecs[0]); 959 + } 960 + 961 + /* 962 + * Multishot accept completion triggered. If we're acting as a sink, we're 963 + * good to go. Just issue a receive for that case. If we're acting as a proxy, 964 + * then start opening a socket that we can use to connect to the other end. 965 + */ 966 + static int handle_accept(struct io_uring *ring, struct io_uring_cqe *cqe) 967 + { 968 + struct conn *c; 969 + int i; 970 + 971 + if (nr_conns == MAX_CONNS) { 972 + fprintf(stderr, "max clients reached %d\n", nr_conns); 973 + return 1; 974 + } 975 + 976 + /* main thread handles this, which is obviously serialized */ 977 + c = &conns[nr_conns]; 978 + c->tid = nr_conns++; 979 + c->in_fd = -1; 980 + c->out_fd = -1; 981 + 982 + for (i = 0; i < 2; i++) { 983 + struct conn_dir *cd = &c->cd[i]; 984 + 985 + cd->index = i; 986 + cd->snd_next_bid = -1; 987 + cd->rcv_next_bid = -1; 988 + if (ext_stat) { 989 + cd->rcv_bucket = calloc(nr_bufs + 1, sizeof(int)); 990 + cd->snd_bucket = calloc(nr_bufs + 1, sizeof(int)); 991 + } 992 + init_msgs(cd); 993 + } 994 + 995 + printf("New client: id=%d, in=%d\n", c->tid, c->in_fd); 996 + gettimeofday(&c->start_time, NULL); 997 + 998 + pthread_barrier_init(&c->startup_barrier, NULL, 2); 999 + pthread_create(&c->thread, NULL, thread_main, c); 1000 + 1001 + /* 1002 + * Wait for thread to have its ring setup, then either assign the fd 1003 + * if it's non-fixed, or pass the fixed one 1004 + */ 1005 + pthread_barrier_wait(&c->startup_barrier); 1006 + if (!fixed_files) { 1007 + c->in_fd = cqe->res; 1008 + } else { 1009 + struct io_uring_sqe *sqe; 1010 + uint64_t user_data; 1011 + 1012 + /* 1013 + * Ring has just been setup, we'll use index 0 as the descriptor 1014 + * value. 1015 + */ 1016 + user_data = __raw_encode(c->tid, __FD_PASS, 0, 0); 1017 + sqe = io_uring_get_sqe(ring); 1018 + io_uring_prep_msg_ring_fd(sqe, c->ring.ring_fd, cqe->res, 0, 1019 + user_data, 0); 1020 + encode_userdata(sqe, c, __NOP, 0, cqe->res); 1021 + } 1022 + 1023 + return 0; 1024 + } 1025 + 1026 + /* 1027 + * Our socket request completed, issue a connect request to the other end. 1028 + */ 1029 + static int handle_sock(struct io_uring *ring, struct io_uring_cqe *cqe) 1030 + { 1031 + struct conn *c = cqe_to_conn(cqe); 1032 + struct io_uring_sqe *sqe; 1033 + int ret; 1034 + 1035 + vlog("%d: sock: res=%d\n", c->tid, cqe->res); 1036 + 1037 + c->out_fd = cqe->res; 1038 + 1039 + if (ipv6) { 1040 + memset(&c->addr6, 0, sizeof(c->addr6)); 1041 + c->addr6.sin6_family = AF_INET6; 1042 + c->addr6.sin6_port = htons(send_port); 1043 + ret = inet_pton(AF_INET6, host, &c->addr6.sin6_addr); 1044 + } else { 1045 + memset(&c->addr, 0, sizeof(c->addr)); 1046 + c->addr.sin_family = AF_INET; 1047 + c->addr.sin_port = htons(send_port); 1048 + ret = inet_pton(AF_INET, host, &c->addr.sin_addr); 1049 + } 1050 + if (ret <= 0) { 1051 + if (!ret) 1052 + fprintf(stderr, "host not in right format\n"); 1053 + else 1054 + perror("inet_pton"); 1055 + return 1; 1056 + } 1057 + 1058 + sqe = get_sqe(ring); 1059 + if (ipv6) { 1060 + io_uring_prep_connect(sqe, c->out_fd, 1061 + (struct sockaddr *) &c->addr6, 1062 + sizeof(c->addr6)); 1063 + } else { 1064 + io_uring_prep_connect(sqe, c->out_fd, 1065 + (struct sockaddr *) &c->addr, 1066 + sizeof(c->addr)); 1067 + } 1068 + encode_userdata(sqe, c, __CONNECT, 0, c->out_fd); 1069 + if (fixed_files) 1070 + sqe->flags |= IOSQE_FIXED_FILE; 1071 + return 0; 1072 + } 1073 + 1074 + /* 1075 + * Connection to the other end is done, submit a receive to start receiving 1076 + * data. If we're a bidirectional proxy, issue a receive on both ends. If not, 1077 + * then just a single recv will do. 1078 + */ 1079 + static int handle_connect(struct io_uring *ring, struct io_uring_cqe *cqe) 1080 + { 1081 + struct conn *c = cqe_to_conn(cqe); 1082 + 1083 + pthread_mutex_lock(&thread_lock); 1084 + open_conns++; 1085 + pthread_mutex_unlock(&thread_lock); 1086 + 1087 + if (bidi) 1088 + submit_bidi_receive(ring, c); 1089 + else 1090 + submit_receive(ring, c); 1091 + 1092 + return 0; 1093 + } 1094 + 1095 + /* 1096 + * Append new segment to our currently active msg_vec. This will be submitted 1097 + * as a sendmsg (with all of it), or as separate sends, later. If we're using 1098 + * send_ring, then we won't hit this path. Instead, outgoing buffers are 1099 + * added directly to our outgoing send buffer ring. 1100 + */ 1101 + static void send_append_vec(struct conn_dir *cd, void *data, int len) 1102 + { 1103 + struct msg_vec *mvec = snd_msg_vec(cd); 1104 + 1105 + if (mvec->iov_len == mvec->vec_size) { 1106 + mvec->vec_size <<= 1; 1107 + mvec->iov = realloc(mvec->iov, mvec->vec_size * sizeof(struct iovec)); 1108 + } 1109 + 1110 + mvec->iov[mvec->iov_len].iov_base = data; 1111 + mvec->iov[mvec->iov_len].iov_len = len; 1112 + mvec->iov_len++; 1113 + } 1114 + 1115 + /* 1116 + * Queue a send based on the data received in this cqe, which came from 1117 + * a completed receive operation. 1118 + */ 1119 + static void send_append(struct conn *c, struct conn_dir *cd, void *data, 1120 + int bid, int len) 1121 + { 1122 + vlog("%d: send %d (%p, bid %d)\n", c->tid, len, data, bid); 1123 + 1124 + assert(bid < nr_bufs); 1125 + 1126 + /* if using provided buffers for send, add it upfront */ 1127 + if (send_ring) { 1128 + struct conn_buf_ring *cbr = &c->out_br; 1129 + 1130 + io_uring_buf_ring_add(cbr->br, data, len, bid, br_mask, 0); 1131 + io_uring_buf_ring_advance(cbr->br, 1); 1132 + } else { 1133 + send_append_vec(cd, data, len); 1134 + } 1135 + } 1136 + 1137 + /* 1138 + * For non recvmsg && multishot, a zero receive marks the end. For recvmsg 1139 + * with multishot, we always get the header regardless. Hence a "zero receive" 1140 + * is the size of the header. 1141 + */ 1142 + static int recv_done_res(int res) 1143 + { 1144 + if (!res) 1145 + return 1; 1146 + if (rcv_msg && recv_mshot && res == sizeof(struct io_uring_recvmsg_out)) 1147 + return 1; 1148 + return 0; 1149 + } 1150 + 1151 + /* 1152 + * Any receive that isn't recvmsg with multishot can be handled the same way. 1153 + * Iterate from '*bid' and 'in_bytes' in total, and append the data to the 1154 + * outgoing queue. 1155 + */ 1156 + static int recv_bids(struct conn *c, struct conn_dir *cd, int *bid, int in_bytes) 1157 + { 1158 + struct conn_buf_ring *cbr = &c->out_br; 1159 + struct conn_buf_ring *in_cbr = &c->in_br; 1160 + struct io_uring_buf *buf; 1161 + int nr_packets = 0; 1162 + 1163 + while (in_bytes) { 1164 + int this_bytes; 1165 + void *data; 1166 + 1167 + buf = &in_cbr->br->bufs[*bid]; 1168 + data = (void *) (unsigned long) buf->addr; 1169 + this_bytes = buf->len; 1170 + if (this_bytes > in_bytes) 1171 + this_bytes = in_bytes; 1172 + 1173 + in_bytes -= this_bytes; 1174 + 1175 + if (send_ring) 1176 + io_uring_buf_ring_add(cbr->br, data, this_bytes, *bid, 1177 + br_mask, nr_packets); 1178 + else 1179 + send_append(c, cd, data, *bid, this_bytes); 1180 + 1181 + *bid = (*bid + 1) & (nr_bufs - 1); 1182 + nr_packets++; 1183 + } 1184 + 1185 + if (send_ring) 1186 + io_uring_buf_ring_advance(cbr->br, nr_packets); 1187 + 1188 + return nr_packets; 1189 + } 1190 + 1191 + /* 1192 + * Special handling of recvmsg with multishot 1193 + */ 1194 + static int recv_mshot_msg(struct conn *c, struct conn_dir *cd, int *bid, 1195 + int in_bytes) 1196 + { 1197 + struct conn_buf_ring *cbr = &c->out_br; 1198 + struct conn_buf_ring *in_cbr = &c->in_br; 1199 + struct io_uring_buf *buf; 1200 + int nr_packets = 0; 1201 + 1202 + while (in_bytes) { 1203 + struct io_uring_recvmsg_out *pdu; 1204 + int this_bytes; 1205 + void *data; 1206 + 1207 + buf = &in_cbr->br->bufs[*bid]; 1208 + 1209 + /* 1210 + * multishot recvmsg puts a header in front of the data - we 1211 + * have to take that into account for the send setup, and 1212 + * adjust the actual data read to not take this metadata into 1213 + * account. For this use case, namelen and controllen will not 1214 + * be set. If they were, they would need to be factored in too. 1215 + */ 1216 + buf->len -= sizeof(struct io_uring_recvmsg_out); 1217 + in_bytes -= sizeof(struct io_uring_recvmsg_out); 1218 + 1219 + pdu = (void *) (unsigned long) buf->addr; 1220 + vlog("pdu namelen %d, controllen %d, payload %d flags %x\n", 1221 + pdu->namelen, pdu->controllen, pdu->payloadlen, 1222 + pdu->flags); 1223 + data = (void *) (pdu + 1); 1224 + 1225 + this_bytes = pdu->payloadlen; 1226 + if (this_bytes > in_bytes) 1227 + this_bytes = in_bytes; 1228 + 1229 + in_bytes -= this_bytes; 1230 + 1231 + if (send_ring) 1232 + io_uring_buf_ring_add(cbr->br, data, this_bytes, *bid, 1233 + br_mask, nr_packets); 1234 + else 1235 + send_append(c, cd, data, *bid, this_bytes); 1236 + 1237 + *bid = (*bid + 1) & (nr_bufs - 1); 1238 + nr_packets++; 1239 + } 1240 + 1241 + if (send_ring) 1242 + io_uring_buf_ring_advance(cbr->br, nr_packets); 1243 + 1244 + return nr_packets; 1245 + } 1246 + 1247 + static int __handle_recv(struct io_uring *ring, struct conn *c, 1248 + struct conn_dir *cd, struct io_uring_cqe *cqe) 1249 + { 1250 + struct conn_dir *ocd = &c->cd[!cd->index]; 1251 + int bid, nr_packets; 1252 + 1253 + /* 1254 + * Not having a buffer attached should only happen if we get a zero 1255 + * sized receive, because the other end closed the connection. It 1256 + * cannot happen otherwise, as all our receives are using provided 1257 + * buffers and hence it's not possible to return a CQE with a non-zero 1258 + * result and not have a buffer attached. 1259 + */ 1260 + if (!(cqe->flags & IORING_CQE_F_BUFFER)) { 1261 + cd->pending_recv = 0; 1262 + 1263 + if (!recv_done_res(cqe->res)) { 1264 + fprintf(stderr, "no buffer assigned, res=%d\n", cqe->res); 1265 + return 1; 1266 + } 1267 + start_close: 1268 + prep_next_send(ring, c, ocd, other_dir_fd(c, cqe_to_fd(cqe))); 1269 + close_cd(c, cd); 1270 + return 0; 1271 + } 1272 + 1273 + if (cqe->res && cqe->res < buf_size) 1274 + cd->rcv_shrt++; 1275 + 1276 + bid = cqe->flags >> IORING_CQE_BUFFER_SHIFT; 1277 + 1278 + /* 1279 + * BIDI will use the same buffer pool and do receive on both CDs, 1280 + * so can't reliably check. TODO. 1281 + */ 1282 + if (!bidi && cd->rcv_next_bid != -1 && bid != cd->rcv_next_bid) { 1283 + fprintf(stderr, "recv bid %d, wanted %d\n", bid, cd->rcv_next_bid); 1284 + goto start_close; 1285 + } 1286 + 1287 + vlog("%d: recv: bid=%d, res=%d, cflags=%x\n", c->tid, bid, cqe->res, cqe->flags); 1288 + /* 1289 + * If we're a sink, we're done here. Just replenish the buffer back 1290 + * to the pool. For proxy mode, we will send the data to the other 1291 + * end and the buffer will be replenished once the send is done with 1292 + * it. 1293 + */ 1294 + if (is_sink) 1295 + nr_packets = replenish_buffers(c, &bid, cqe->res); 1296 + else if (rcv_msg && recv_mshot) 1297 + nr_packets = recv_mshot_msg(c, ocd, &bid, cqe->res); 1298 + else 1299 + nr_packets = recv_bids(c, ocd, &bid, cqe->res); 1300 + 1301 + if (cd->rcv_bucket) 1302 + cd->rcv_bucket[nr_packets]++; 1303 + 1304 + if (!is_sink) { 1305 + ocd->out_buffers += nr_packets; 1306 + assert(ocd->out_buffers <= nr_bufs); 1307 + } 1308 + 1309 + cd->rcv++; 1310 + cd->rcv_next_bid = bid; 1311 + 1312 + /* 1313 + * If IORING_CQE_F_MORE isn't set, then this is either a normal recv 1314 + * that needs rearming, or it's a multishot that won't post any further 1315 + * completions. Setup a new one for these cases. 1316 + */ 1317 + if (!(cqe->flags & IORING_CQE_F_MORE)) { 1318 + cd->pending_recv = 0; 1319 + if (recv_done_res(cqe->res)) 1320 + goto start_close; 1321 + if (is_sink) 1322 + __submit_receive(ring, c, &c->cd[0], c->in_fd); 1323 + } 1324 + 1325 + /* 1326 + * Submit a send if we won't get anymore notifications from this 1327 + * recv, or if we have nr_bufs / 2 queued up. If BIDI mode, send 1328 + * every buffer. We assume this is interactive mode, and hence don't 1329 + * delay anything. 1330 + */ 1331 + if (((!ocd->pending_send && (bidi || (ocd->out_buffers >= nr_bufs / 2))) || 1332 + !(cqe->flags & IORING_CQE_F_MORE)) && !is_sink) 1333 + prep_next_send(ring, c, ocd, other_dir_fd(c, cqe_to_fd(cqe))); 1334 + 1335 + if (!recv_done_res(cqe->res)) 1336 + cd->in_bytes += cqe->res; 1337 + return 0; 1338 + } 1339 + 1340 + static int handle_recv(struct io_uring *ring, struct io_uring_cqe *cqe) 1341 + { 1342 + struct conn *c = cqe_to_conn(cqe); 1343 + struct conn_dir *cd = cqe_to_conn_dir(c, cqe); 1344 + 1345 + return __handle_recv(ring, c, cd, cqe); 1346 + } 1347 + 1348 + static int recv_error(struct error_handler *err, struct io_uring *ring, 1349 + struct io_uring_cqe *cqe) 1350 + { 1351 + struct conn *c = cqe_to_conn(cqe); 1352 + struct conn_dir *cd = cqe_to_conn_dir(c, cqe); 1353 + 1354 + cd->pending_recv = 0; 1355 + 1356 + if (cqe->res != -ENOBUFS) 1357 + return default_error(err, ring, cqe); 1358 + 1359 + recv_enobufs(ring, c, cd, other_dir_fd(c, cqe_to_fd(cqe))); 1360 + return 0; 1361 + } 1362 + 1363 + static void submit_send(struct io_uring *ring, struct conn *c, 1364 + struct conn_dir *cd, int fd, void *data, int len, 1365 + int bid, int flags) 1366 + { 1367 + struct io_uring_sqe *sqe; 1368 + int bgid = c->out_br.bgid; 1369 + 1370 + if (cd->pending_send) 1371 + return; 1372 + cd->pending_send = 1; 1373 + 1374 + flags |= MSG_WAITALL | MSG_NOSIGNAL; 1375 + 1376 + sqe = get_sqe(ring); 1377 + if (snd_msg) { 1378 + struct io_msg *imsg = &cd->io_snd_msg; 1379 + 1380 + if (snd_zc) { 1381 + io_uring_prep_sendmsg_zc(sqe, fd, &imsg->msg, flags); 1382 + cd->snd_notif++; 1383 + } else { 1384 + io_uring_prep_sendmsg(sqe, fd, &imsg->msg, flags); 1385 + } 1386 + } else if (send_ring) { 1387 + io_uring_prep_send(sqe, fd, NULL, 0, flags); 1388 + } else if (!snd_zc) { 1389 + io_uring_prep_send(sqe, fd, data, len, flags); 1390 + } else { 1391 + io_uring_prep_send_zc(sqe, fd, data, len, flags, 0); 1392 + sqe->ioprio |= IORING_RECVSEND_FIXED_BUF; 1393 + sqe->buf_index = bid; 1394 + cd->snd_notif++; 1395 + } 1396 + encode_userdata(sqe, c, __SEND, bid, fd); 1397 + if (fixed_files) 1398 + sqe->flags |= IOSQE_FIXED_FILE; 1399 + if (send_ring) { 1400 + sqe->flags |= IOSQE_BUFFER_SELECT; 1401 + sqe->buf_group = bgid; 1402 + } 1403 + if (snd_bundle) { 1404 + sqe->ioprio |= IORING_RECVSEND_BUNDLE; 1405 + cd->snd_mshot++; 1406 + } else if (send_ring) 1407 + cd->snd_mshot++; 1408 + } 1409 + 1410 + /* 1411 + * Prepare the next send request, if we need to. If one is already pending, 1412 + * or if we're a sink and we don't need to do sends, then there's nothing 1413 + * to do. 1414 + * 1415 + * Return 1 if another send completion is expected, 0 if not. 1416 + */ 1417 + static int prep_next_send(struct io_uring *ring, struct conn *c, 1418 + struct conn_dir *cd, int fd) 1419 + { 1420 + int bid; 1421 + 1422 + if (cd->pending_send || is_sink) 1423 + return 0; 1424 + if (!cd->out_buffers) 1425 + return 0; 1426 + 1427 + bid = cd->snd_next_bid; 1428 + if (bid == -1) 1429 + bid = 0; 1430 + 1431 + if (send_ring) { 1432 + /* 1433 + * send_ring mode is easy, there's nothing to do but submit 1434 + * our next send request. That will empty the entire outgoing 1435 + * queue. 1436 + */ 1437 + submit_send(ring, c, cd, fd, NULL, 0, bid, 0); 1438 + return 1; 1439 + } else if (snd_msg) { 1440 + /* 1441 + * For sendmsg mode, submit our currently prepared iovec, if 1442 + * we have one, and swap our iovecs so that any further 1443 + * receives will start preparing that one. 1444 + */ 1445 + struct io_msg *imsg = &cd->io_snd_msg; 1446 + 1447 + if (!msg_vec(imsg)->iov_len) 1448 + return 0; 1449 + imsg->msg.msg_iov = msg_vec(imsg)->iov; 1450 + imsg->msg.msg_iovlen = msg_vec(imsg)->iov_len; 1451 + msg_vec(imsg)->iov_len = 0; 1452 + imsg->vec_index = !imsg->vec_index; 1453 + submit_send(ring, c, cd, fd, NULL, 0, bid, 0); 1454 + return 1; 1455 + } else { 1456 + /* 1457 + * send without send_ring - submit the next available vec, 1458 + * if any. If this vec is the last one in the current series, 1459 + * then swap to the next vec. We flag each send with MSG_MORE, 1460 + * unless this is the last part of the current vec. 1461 + */ 1462 + struct io_msg *imsg = &cd->io_snd_msg; 1463 + struct msg_vec *mvec = msg_vec(imsg); 1464 + int flags = !snd_zc ? MSG_MORE : 0; 1465 + struct iovec *iov; 1466 + 1467 + if (mvec->iov_len == mvec->cur_iov) 1468 + return 0; 1469 + imsg->msg.msg_iov = msg_vec(imsg)->iov; 1470 + iov = &mvec->iov[mvec->cur_iov]; 1471 + mvec->cur_iov++; 1472 + if (mvec->cur_iov == mvec->iov_len) { 1473 + mvec->iov_len = 0; 1474 + mvec->cur_iov = 0; 1475 + imsg->vec_index = !imsg->vec_index; 1476 + flags = 0; 1477 + } 1478 + submit_send(ring, c, cd, fd, iov->iov_base, iov->iov_len, bid, flags); 1479 + return 1; 1480 + } 1481 + } 1482 + 1483 + /* 1484 + * Handling a send with an outgoing send ring. Get the buffers from the 1485 + * receive side, and add them to the ingoing buffer ring again. 1486 + */ 1487 + static int handle_send_ring(struct conn *c, struct conn_dir *cd, 1488 + int bid, int bytes) 1489 + { 1490 + struct conn_buf_ring *in_cbr = &c->in_br; 1491 + struct conn_buf_ring *out_cbr = &c->out_br; 1492 + int i = 0; 1493 + 1494 + while (bytes) { 1495 + struct io_uring_buf *buf = &out_cbr->br->bufs[bid]; 1496 + int this_bytes; 1497 + void *this_buf; 1498 + 1499 + this_bytes = buf->len; 1500 + if (this_bytes > bytes) 1501 + this_bytes = bytes; 1502 + 1503 + cd->out_bytes += this_bytes; 1504 + 1505 + vlog("%d: send: bid=%d, len=%d\n", c->tid, bid, this_bytes); 1506 + 1507 + this_buf = in_cbr->buf + bid * buf_size; 1508 + io_uring_buf_ring_add(in_cbr->br, this_buf, buf_size, bid, br_mask, i); 1509 + /* 1510 + * Find the provided buffer that the receive consumed, and 1511 + * which we then used for the send, and add it back to the 1512 + * pool so it can get picked by another receive. Once the send 1513 + * is done, we're done with it. 1514 + */ 1515 + bid = (bid + 1) & (nr_bufs - 1); 1516 + bytes -= this_bytes; 1517 + i++; 1518 + } 1519 + cd->snd_next_bid = bid; 1520 + io_uring_buf_ring_advance(in_cbr->br, i); 1521 + 1522 + if (pending_shutdown(c)) 1523 + close_cd(c, cd); 1524 + 1525 + return i; 1526 + } 1527 + 1528 + /* 1529 + * sendmsg, or send without a ring. Just add buffers back to the ingoing 1530 + * ring for receives. 1531 + */ 1532 + static int handle_send_buf(struct conn *c, struct conn_dir *cd, int bid, 1533 + int bytes) 1534 + { 1535 + struct conn_buf_ring *in_cbr = &c->in_br; 1536 + int i = 0; 1537 + 1538 + while (bytes) { 1539 + struct io_uring_buf *buf = &in_cbr->br->bufs[bid]; 1540 + int this_bytes; 1541 + 1542 + this_bytes = bytes; 1543 + if (this_bytes > buf->len) 1544 + this_bytes = buf->len; 1545 + 1546 + vlog("%d: send: bid=%d, len=%d\n", c->tid, bid, this_bytes); 1547 + 1548 + cd->out_bytes += this_bytes; 1549 + /* each recvmsg mshot package has this overhead */ 1550 + if (rcv_msg && recv_mshot) 1551 + cd->out_bytes += sizeof(struct io_uring_recvmsg_out); 1552 + replenish_buffer(in_cbr, bid, i); 1553 + bid = (bid + 1) & (nr_bufs - 1); 1554 + bytes -= this_bytes; 1555 + i++; 1556 + } 1557 + io_uring_buf_ring_advance(in_cbr->br, i); 1558 + cd->snd_next_bid = bid; 1559 + return i; 1560 + } 1561 + 1562 + static int __handle_send(struct io_uring *ring, struct conn *c, 1563 + struct conn_dir *cd, struct io_uring_cqe *cqe) 1564 + { 1565 + struct conn_dir *ocd; 1566 + int bid, nr_packets; 1567 + 1568 + if (send_ring) { 1569 + if (!(cqe->flags & IORING_CQE_F_BUFFER)) { 1570 + fprintf(stderr, "no buffer in send?! %d\n", cqe->res); 1571 + return 1; 1572 + } 1573 + bid = cqe->flags >> IORING_CQE_BUFFER_SHIFT; 1574 + } else { 1575 + bid = cqe_to_bid(cqe); 1576 + } 1577 + 1578 + /* 1579 + * CQE notifications only happen with send/sendmsg zerocopy. They 1580 + * tell us that the data has been acked, and that hence the buffer 1581 + * is now free to reuse. Waiting on an ACK for each packet will slow 1582 + * us down tremendously, so do all of our sends and then wait for 1583 + * the ACKs to come in. They tend to come in bundles anyway. Once 1584 + * all acks are done (cd->snd_notif == 0), then fire off the next 1585 + * receive. 1586 + */ 1587 + if (cqe->flags & IORING_CQE_F_NOTIF) { 1588 + cd->snd_notif--; 1589 + } else { 1590 + if (cqe->res && cqe->res < buf_size) 1591 + cd->snd_shrt++; 1592 + 1593 + /* 1594 + * BIDI will use the same buffer pool and do sends on both CDs, 1595 + * so can't reliably check. TODO. 1596 + */ 1597 + if (!bidi && send_ring && cd->snd_next_bid != -1 && 1598 + bid != cd->snd_next_bid) { 1599 + fprintf(stderr, "send bid %d, wanted %d at %lu\n", bid, 1600 + cd->snd_next_bid, cd->out_bytes); 1601 + goto out_close; 1602 + } 1603 + 1604 + assert(bid <= nr_bufs); 1605 + 1606 + vlog("send: got %d, %lu\n", cqe->res, cd->out_bytes); 1607 + 1608 + if (send_ring) 1609 + nr_packets = handle_send_ring(c, cd, bid, cqe->res); 1610 + else 1611 + nr_packets = handle_send_buf(c, cd, bid, cqe->res); 1612 + 1613 + if (cd->snd_bucket) 1614 + cd->snd_bucket[nr_packets]++; 1615 + 1616 + cd->out_buffers -= nr_packets; 1617 + assert(cd->out_buffers >= 0); 1618 + 1619 + cd->snd++; 1620 + } 1621 + 1622 + if (!(cqe->flags & IORING_CQE_F_MORE)) { 1623 + int do_recv_arm; 1624 + 1625 + cd->pending_send = 0; 1626 + 1627 + /* 1628 + * send done - see if the current vec has data to submit, and 1629 + * do so if it does. if it doesn't have data yet, nothing to 1630 + * do. 1631 + */ 1632 + do_recv_arm = !prep_next_send(ring, c, cd, cqe_to_fd(cqe)); 1633 + 1634 + ocd = &c->cd[!cd->index]; 1635 + if (!cd->snd_notif && do_recv_arm && !ocd->pending_recv) { 1636 + int fd = other_dir_fd(c, cqe_to_fd(cqe)); 1637 + 1638 + __submit_receive(ring, c, ocd, fd); 1639 + } 1640 + out_close: 1641 + if (pending_shutdown(c)) 1642 + close_cd(c, cd); 1643 + } 1644 + 1645 + vlog("%d: pending sends %d\n", c->tid, cd->pending_send); 1646 + return 0; 1647 + } 1648 + 1649 + static int handle_send(struct io_uring *ring, struct io_uring_cqe *cqe) 1650 + { 1651 + struct conn *c = cqe_to_conn(cqe); 1652 + struct conn_dir *cd = cqe_to_conn_dir(c, cqe); 1653 + 1654 + return __handle_send(ring, c, cd, cqe); 1655 + } 1656 + 1657 + static int send_error(struct error_handler *err, struct io_uring *ring, 1658 + struct io_uring_cqe *cqe) 1659 + { 1660 + struct conn *c = cqe_to_conn(cqe); 1661 + struct conn_dir *cd = cqe_to_conn_dir(c, cqe); 1662 + 1663 + cd->pending_send = 0; 1664 + 1665 + /* res can have high bit set */ 1666 + if (cqe->flags & IORING_CQE_F_NOTIF) 1667 + return handle_send(ring, cqe); 1668 + if (cqe->res != -ENOBUFS) 1669 + return default_error(err, ring, cqe); 1670 + 1671 + cd->snd_enobufs++; 1672 + return 0; 1673 + } 1674 + 1675 + /* 1676 + * We don't expect to get here, as we marked it with skipping posting a 1677 + * CQE if it was successful. If it does trigger, than means it fails and 1678 + * that our close has not been done. Log the shutdown error and issue a new 1679 + * separate close. 1680 + */ 1681 + static int handle_shutdown(struct io_uring *ring, struct io_uring_cqe *cqe) 1682 + { 1683 + struct conn *c = cqe_to_conn(cqe); 1684 + struct io_uring_sqe *sqe; 1685 + int fd = cqe_to_fd(cqe); 1686 + 1687 + fprintf(stderr, "Got shutdown notication on fd %d\n", fd); 1688 + 1689 + if (!cqe->res) 1690 + fprintf(stderr, "Unexpected success shutdown CQE\n"); 1691 + else if (cqe->res < 0) 1692 + fprintf(stderr, "Shutdown got %s\n", strerror(-cqe->res)); 1693 + 1694 + sqe = get_sqe(ring); 1695 + if (fixed_files) 1696 + io_uring_prep_close_direct(sqe, fd); 1697 + else 1698 + io_uring_prep_close(sqe, fd); 1699 + encode_userdata(sqe, c, __CLOSE, 0, fd); 1700 + return 0; 1701 + } 1702 + 1703 + /* 1704 + * Final stage of a connection, the shutdown and close has finished. Mark 1705 + * it as disconnected and let the main loop reap it. 1706 + */ 1707 + static int handle_close(struct io_uring *ring, struct io_uring_cqe *cqe) 1708 + { 1709 + struct conn *c = cqe_to_conn(cqe); 1710 + int fd = cqe_to_fd(cqe); 1711 + 1712 + printf("Closed client: id=%d, in_fd=%d, out_fd=%d\n", c->tid, c->in_fd, c->out_fd); 1713 + if (fd == c->in_fd) 1714 + c->in_fd = -1; 1715 + else if (fd == c->out_fd) 1716 + c->out_fd = -1; 1717 + 1718 + if (c->in_fd == -1 && c->out_fd == -1) { 1719 + c->flags |= CONN_F_DISCONNECTED; 1720 + 1721 + pthread_mutex_lock(&thread_lock); 1722 + __show_stats(c); 1723 + open_conns--; 1724 + pthread_mutex_unlock(&thread_lock); 1725 + free_buffer_rings(ring, c); 1726 + free_msgs(&c->cd[0]); 1727 + free_msgs(&c->cd[1]); 1728 + free(c->cd[0].rcv_bucket); 1729 + free(c->cd[0].snd_bucket); 1730 + } 1731 + 1732 + return 0; 1733 + } 1734 + 1735 + static int handle_cancel(struct io_uring *ring, struct io_uring_cqe *cqe) 1736 + { 1737 + struct conn *c = cqe_to_conn(cqe); 1738 + int fd = cqe_to_fd(cqe); 1739 + 1740 + c->pending_cancels--; 1741 + 1742 + vlog("%d: got cancel fd %d, refs %d\n", c->tid, fd, c->pending_cancels); 1743 + 1744 + if (!c->pending_cancels) { 1745 + queue_shutdown_close(ring, c, c->in_fd); 1746 + if (c->out_fd != -1) 1747 + queue_shutdown_close(ring, c, c->out_fd); 1748 + io_uring_submit(ring); 1749 + } 1750 + 1751 + return 0; 1752 + } 1753 + 1754 + static void open_socket(struct conn *c) 1755 + { 1756 + if (is_sink) { 1757 + pthread_mutex_lock(&thread_lock); 1758 + open_conns++; 1759 + pthread_mutex_unlock(&thread_lock); 1760 + 1761 + submit_receive(&c->ring, c); 1762 + } else { 1763 + struct io_uring_sqe *sqe; 1764 + int domain; 1765 + 1766 + if (ipv6) 1767 + domain = AF_INET6; 1768 + else 1769 + domain = AF_INET; 1770 + 1771 + /* 1772 + * If fixed_files is set, proxy will use fixed files for any new 1773 + * file descriptors it instantiates. Fixd files, or fixed 1774 + * descriptors, are io_uring private file descriptors. They 1775 + * cannot be accessed outside of io_uring. io_uring holds a 1776 + * fixed reference to them, which means that we do not need to 1777 + * grab per-request references to them. Particularly for 1778 + * threaded applications, grabbing and dropping file references 1779 + * for each operation can be costly as the file table is shared. 1780 + * This generally shows up as fget/fput related overhead in any 1781 + * workload profiles. 1782 + * 1783 + * Fixed descriptors are passed in via the 'fd' field just like 1784 + * regular descriptors, and then marked as such by setting the 1785 + * IOSQE_FIXED_FILE flag in the sqe->flags field. Some helpers 1786 + * do that automatically, like the below, others will need it 1787 + * set manually if they don't have a *direct*() helper. 1788 + * 1789 + * For operations that instantiate them, like the opening of a 1790 + * direct socket, the application may either ask the kernel to 1791 + * find a free one (as is done below), or the application may 1792 + * manage the space itself and pass in an index for a currently 1793 + * free slot in the table. If the kernel is asked to allocate a 1794 + * free direct descriptor, note that io_uring does not abide by 1795 + * the POSIX mandated "lowest free must be returned". It may 1796 + * return any free descriptor of its choosing. 1797 + */ 1798 + sqe = get_sqe(&c->ring); 1799 + if (fixed_files) 1800 + io_uring_prep_socket_direct_alloc(sqe, domain, SOCK_STREAM, 0, 0); 1801 + else 1802 + io_uring_prep_socket(sqe, domain, SOCK_STREAM, 0, 0); 1803 + encode_userdata(sqe, c, __SOCK, 0, 0); 1804 + } 1805 + } 1806 + 1807 + /* 1808 + * Start of connection, we got our in descriptor. 1809 + */ 1810 + static int handle_fd_pass(struct io_uring_cqe *cqe) 1811 + { 1812 + struct conn *c = cqe_to_conn(cqe); 1813 + int fd = cqe_to_fd(cqe); 1814 + 1815 + vlog("%d: got fd pass %d\n", c->tid, fd); 1816 + c->in_fd = fd; 1817 + open_socket(c); 1818 + return 0; 1819 + } 1820 + 1821 + static int handle_stop(struct io_uring_cqe *cqe) 1822 + { 1823 + struct conn *c = cqe_to_conn(cqe); 1824 + 1825 + printf("Client %d: queueing shutdown\n", c->tid); 1826 + queue_cancel(&c->ring, c); 1827 + return 0; 1828 + } 1829 + 1830 + /* 1831 + * Called for each CQE that we receive. Decode the request type that it 1832 + * came from, and call the appropriate handler. 1833 + */ 1834 + static int handle_cqe(struct io_uring *ring, struct io_uring_cqe *cqe) 1835 + { 1836 + int ret; 1837 + 1838 + /* 1839 + * Unlikely, but there's an error in this CQE. If an error handler 1840 + * is defined, call it, and that will deal with it. If no error 1841 + * handler is defined, the opcode handler either doesn't care or will 1842 + * handle it on its own. 1843 + */ 1844 + if (cqe->res < 0) { 1845 + struct error_handler *err = &error_handlers[cqe_to_op(cqe)]; 1846 + 1847 + if (err->error_fn) 1848 + return err->error_fn(err, ring, cqe); 1849 + } 1850 + 1851 + switch (cqe_to_op(cqe)) { 1852 + case __ACCEPT: 1853 + ret = handle_accept(ring, cqe); 1854 + break; 1855 + case __SOCK: 1856 + ret = handle_sock(ring, cqe); 1857 + break; 1858 + case __CONNECT: 1859 + ret = handle_connect(ring, cqe); 1860 + break; 1861 + case __RECV: 1862 + case __RECVMSG: 1863 + ret = handle_recv(ring, cqe); 1864 + break; 1865 + case __SEND: 1866 + case __SENDMSG: 1867 + ret = handle_send(ring, cqe); 1868 + break; 1869 + case __CANCEL: 1870 + ret = handle_cancel(ring, cqe); 1871 + break; 1872 + case __SHUTDOWN: 1873 + ret = handle_shutdown(ring, cqe); 1874 + break; 1875 + case __CLOSE: 1876 + ret = handle_close(ring, cqe); 1877 + break; 1878 + case __FD_PASS: 1879 + ret = handle_fd_pass(cqe); 1880 + break; 1881 + case __STOP: 1882 + ret = handle_stop(cqe); 1883 + break; 1884 + case __NOP: 1885 + ret = 0; 1886 + break; 1887 + default: 1888 + fprintf(stderr, "bad user data %lx\n", (long) cqe->user_data); 1889 + return 1; 1890 + } 1891 + 1892 + return ret; 1893 + } 1894 + 1895 + static void house_keeping(struct io_uring *ring) 1896 + { 1897 + static unsigned long last_bytes; 1898 + unsigned long bytes, elapsed; 1899 + struct conn *c; 1900 + int i, j; 1901 + 1902 + vlog("House keeping entered\n"); 1903 + 1904 + bytes = 0; 1905 + for (i = 0; i < nr_conns; i++) { 1906 + c = &conns[i]; 1907 + 1908 + for (j = 0; j < 2; j++) { 1909 + struct conn_dir *cd = &c->cd[j]; 1910 + 1911 + bytes += cd->in_bytes + cd->out_bytes; 1912 + } 1913 + if (c->flags & CONN_F_DISCONNECTED) { 1914 + vlog("%d: disconnected\n", i); 1915 + 1916 + if (!(c->flags & CONN_F_REAPED)) { 1917 + void *ret; 1918 + 1919 + pthread_join(c->thread, &ret); 1920 + c->flags |= CONN_F_REAPED; 1921 + } 1922 + continue; 1923 + } 1924 + if (c->flags & CONN_F_DISCONNECTING) 1925 + continue; 1926 + 1927 + if (should_shutdown(c)) { 1928 + __close_conn(ring, c); 1929 + c->flags |= CONN_F_DISCONNECTING; 1930 + } 1931 + } 1932 + 1933 + elapsed = mtime_since_now(&last_housekeeping); 1934 + if (bytes && elapsed >= 900) { 1935 + unsigned long bw; 1936 + 1937 + bw = (8 * (bytes - last_bytes) / 1000UL) / elapsed; 1938 + if (bw) { 1939 + if (open_conns) 1940 + printf("Bandwidth (threads=%d): %'luMbit\n", open_conns, bw); 1941 + gettimeofday(&last_housekeeping, NULL); 1942 + last_bytes = bytes; 1943 + } 1944 + } 1945 + } 1946 + 1947 + /* 1948 + * Event loop shared between the parent, and the connections. Could be 1949 + * split in two, as they don't handle the same types of events. For the per 1950 + * connection loop, 'c' is valid. For the main loop, it's NULL. 1951 + */ 1952 + static int __event_loop(struct io_uring *ring, struct conn *c) 1953 + { 1954 + struct __kernel_timespec active_ts, idle_ts; 1955 + int flags; 1956 + 1957 + idle_ts.tv_sec = 0; 1958 + idle_ts.tv_nsec = 100000000LL; 1959 + active_ts = idle_ts; 1960 + if (wait_usec > 1000000) { 1961 + active_ts.tv_sec = wait_usec / 1000000; 1962 + wait_usec -= active_ts.tv_sec * 1000000; 1963 + } 1964 + active_ts.tv_nsec = wait_usec * 1000; 1965 + 1966 + gettimeofday(&last_housekeeping, NULL); 1967 + 1968 + flags = 0; 1969 + while (1) { 1970 + struct __kernel_timespec *ts = &idle_ts; 1971 + struct io_uring_cqe *cqe; 1972 + unsigned int head; 1973 + int ret, i, to_wait; 1974 + 1975 + /* 1976 + * If wait_batch is set higher than 1, then we'll wait on 1977 + * that amount of CQEs to be posted each loop. If used with 1978 + * DEFER_TASKRUN, this can provide a substantial reduction 1979 + * in context switch rate as the task isn't woken until the 1980 + * requested number of events can be returned. 1981 + * 1982 + * Can be used with -t to set a wait_usec timeout as well. 1983 + * For example, if an application can deal with 250 usec 1984 + * of wait latencies, it can set -w8 -t250 which will cause 1985 + * io_uring to return when either 8 events have been received, 1986 + * or if 250 usec of waiting has passed. 1987 + * 1988 + * If we don't have any open connections, wait on just 1 1989 + * always. 1990 + */ 1991 + to_wait = 1; 1992 + if (open_conns && !flags) { 1993 + ts = &active_ts; 1994 + to_wait = wait_batch; 1995 + } 1996 + 1997 + vlog("Submit and wait for %d\n", to_wait); 1998 + ret = io_uring_submit_and_wait_timeout(ring, &cqe, to_wait, ts, NULL); 1999 + 2000 + if (*ring->cq.koverflow) 2001 + printf("overflow %u\n", *ring->cq.koverflow); 2002 + if (*ring->sq.kflags & IORING_SQ_CQ_OVERFLOW) 2003 + printf("saw overflow\n"); 2004 + 2005 + vlog("Submit and wait: %d\n", ret); 2006 + 2007 + i = flags = 0; 2008 + io_uring_for_each_cqe(ring, head, cqe) { 2009 + if (handle_cqe(ring, cqe)) 2010 + return 1; 2011 + flags |= cqe_to_conn(cqe)->flags; 2012 + ++i; 2013 + } 2014 + 2015 + vlog("Handled %d events\n", i); 2016 + 2017 + /* 2018 + * Advance the CQ ring for seen events when we've processed 2019 + * all of them in this loop. This can also be done with 2020 + * io_uring_cqe_seen() in each handler above, which just marks 2021 + * that single CQE as seen. However, it's more efficient to 2022 + * mark a batch as seen when we're done with that batch. 2023 + */ 2024 + if (i) { 2025 + io_uring_cq_advance(ring, i); 2026 + events += i; 2027 + } 2028 + 2029 + event_loops++; 2030 + if (c) { 2031 + if (c->flags & CONN_F_DISCONNECTED) 2032 + break; 2033 + } else { 2034 + house_keeping(ring); 2035 + } 2036 + } 2037 + 2038 + return 0; 2039 + } 2040 + 2041 + /* 2042 + * Main event loop, Submit our multishot accept request, and then just loop 2043 + * around handling incoming connections. 2044 + */ 2045 + static int parent_loop(struct io_uring *ring, int fd) 2046 + { 2047 + struct io_uring_sqe *sqe; 2048 + 2049 + /* 2050 + * proxy provides a way to use either multishot receive or not, but 2051 + * for accept, we always use multishot. A multishot accept request 2052 + * needs only be armed once, and then it'll trigger a completion and 2053 + * post a CQE whenever a new connection is accepted. No need to do 2054 + * anything else, unless the multishot accept terminates. This happens 2055 + * if it encounters an error. Applications should check for 2056 + * IORING_CQE_F_MORE in cqe->flags - this tells you if more completions 2057 + * are expected from this request or not. Non-multishot never have 2058 + * this set, where multishot will always have this set unless an error 2059 + * occurs. 2060 + */ 2061 + sqe = get_sqe(ring); 2062 + if (fixed_files) 2063 + io_uring_prep_multishot_accept_direct(sqe, fd, NULL, NULL, 0); 2064 + else 2065 + io_uring_prep_multishot_accept(sqe, fd, NULL, NULL, 0); 2066 + __encode_userdata(sqe, 0, __ACCEPT, 0, fd); 2067 + 2068 + return __event_loop(ring, NULL); 2069 + } 2070 + 2071 + static int init_ring(struct io_uring *ring, int nr_files) 2072 + { 2073 + struct io_uring_params params; 2074 + int ret; 2075 + 2076 + /* 2077 + * By default, set us up with a big CQ ring. Not strictly needed 2078 + * here, but it's very important to never overflow the CQ ring. 2079 + * Events will not be dropped if this happens, but it does slow 2080 + * the application down in dealing with overflown events. 2081 + * 2082 + * Set SINGLE_ISSUER, which tells the kernel that only one thread 2083 + * is doing IO submissions. This enables certain optimizations in 2084 + * the kernel. 2085 + */ 2086 + memset(&params, 0, sizeof(params)); 2087 + params.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_CLAMP; 2088 + params.flags |= IORING_SETUP_CQSIZE; 2089 + params.cq_entries = 1024; 2090 + 2091 + /* 2092 + * If use_huge is set, setup the ring with IORING_SETUP_NO_MMAP. This 2093 + * means that the application allocates the memory for the ring, and 2094 + * the kernel maps it. The alternative is having the kernel allocate 2095 + * the memory, and then liburing will mmap it. But we can't really 2096 + * support huge pages that way. If this fails, then ensure that the 2097 + * system has huge pages set aside upfront. 2098 + */ 2099 + if (use_huge) 2100 + params.flags |= IORING_SETUP_NO_MMAP; 2101 + 2102 + /* 2103 + * DEFER_TASKRUN decouples async event reaping and retrying from 2104 + * regular system calls. If this isn't set, then io_uring uses 2105 + * normal task_work for this. task_work is always being run on any 2106 + * exit to userspace. Real applications do more than just call IO 2107 + * related system calls, and hence we can be running this work way 2108 + * too often. Using DEFER_TASKRUN defers any task_work running to 2109 + * when the application enters the kernel anyway to wait on new 2110 + * events. It's generally the preferred and recommended way to setup 2111 + * a ring. 2112 + */ 2113 + if (defer_tw) { 2114 + params.flags |= IORING_SETUP_DEFER_TASKRUN; 2115 + sqpoll = 0; 2116 + } 2117 + 2118 + /* 2119 + * SQPOLL offloads any request submission and retry operations to a 2120 + * dedicated thread. This enables an application to do IO without 2121 + * ever having to enter the kernel itself. The SQPOLL thread will 2122 + * stay busy as long as there's work to do, and go to sleep if 2123 + * sq_thread_idle msecs have passed. If it's running, submitting new 2124 + * IO just needs to make them visible to the SQPOLL thread, it needs 2125 + * not enter the kernel. For submission, the application will only 2126 + * enter the kernel if the SQPOLL has been idle long enough that it 2127 + * has gone to sleep. 2128 + * 2129 + * Waiting on events still need to enter the kernel, if none are 2130 + * available. The application may also use io_uring_peek_cqe() to 2131 + * check for new events without entering the kernel, as completions 2132 + * will be continually produced to the CQ ring by the SQPOLL thread 2133 + * as they occur. 2134 + */ 2135 + if (sqpoll) { 2136 + params.flags |= IORING_SETUP_SQPOLL; 2137 + params.sq_thread_idle = 1000; 2138 + defer_tw = 0; 2139 + } 2140 + 2141 + /* 2142 + * If neither DEFER_TASKRUN or SQPOLL is used, set COOP_TASKRUN. This 2143 + * avoids heavy signal based notifications, which can force an 2144 + * application to enter the kernel and process it as soon as they 2145 + * occur. 2146 + */ 2147 + if (!sqpoll && !defer_tw) 2148 + params.flags |= IORING_SETUP_COOP_TASKRUN; 2149 + 2150 + /* 2151 + * The SQ ring size need not be larger than any batch of requests 2152 + * that need to be prepared before submit. Normally in a loop we'd 2153 + * only need a few, if any, particularly if multishot is used. 2154 + */ 2155 + ret = io_uring_queue_init_params(ring_size, ring, &params); 2156 + if (ret) { 2157 + fprintf(stderr, "%s\n", strerror(-ret)); 2158 + return 1; 2159 + } 2160 + 2161 + /* 2162 + * If send serialization is available and no option was given to use 2163 + * it or not, default it to on. If it was turned on and the kernel 2164 + * doesn't support it, turn it off. 2165 + */ 2166 + if (params.features & IORING_FEAT_SEND_BUF_SELECT) { 2167 + if (send_ring == -1) 2168 + send_ring = 1; 2169 + } else { 2170 + if (send_ring == 1) { 2171 + fprintf(stderr, "Kernel doesn't support ring provided " 2172 + "buffers for sends, disabled\n"); 2173 + } 2174 + send_ring = 0; 2175 + } 2176 + 2177 + if (!send_ring && snd_bundle) { 2178 + fprintf(stderr, "Can't use send bundle without send_ring\n"); 2179 + snd_bundle = 0; 2180 + } 2181 + 2182 + if (fixed_files) { 2183 + /* 2184 + * If fixed files are used, we need to allocate a fixed file 2185 + * table upfront where new direct descriptors can be managed. 2186 + */ 2187 + ret = io_uring_register_files_sparse(ring, nr_files); 2188 + if (ret) { 2189 + fprintf(stderr, "file register: %d\n", ret); 2190 + return 1; 2191 + } 2192 + 2193 + /* 2194 + * If fixed files are used, we also register the ring fd. See 2195 + * comment near io_uring_prep_socket_direct_alloc() further 2196 + * down. This avoids the fget/fput overhead associated with 2197 + * the io_uring_enter(2) system call itself, which is used to 2198 + * submit and wait on events. 2199 + */ 2200 + ret = io_uring_register_ring_fd(ring); 2201 + if (ret != 1) { 2202 + fprintf(stderr, "ring register: %d\n", ret); 2203 + return 1; 2204 + } 2205 + } 2206 + 2207 + if (napi) { 2208 + struct io_uring_napi n = { 2209 + .prefer_busy_poll = napi > 1 ? 1 : 0, 2210 + .busy_poll_to = napi_timeout, 2211 + }; 2212 + 2213 + ret = io_uring_register_napi(ring, &n); 2214 + if (ret) { 2215 + fprintf(stderr, "io_uring_register_napi: %d\n", ret); 2216 + if (ret != -EINVAL) 2217 + return 1; 2218 + fprintf(stderr, "NAPI not available, turned off\n"); 2219 + } 2220 + } 2221 + 2222 + return 0; 2223 + } 2224 + 2225 + static void *thread_main(void *data) 2226 + { 2227 + struct conn *c = data; 2228 + int ret; 2229 + 2230 + c->flags |= CONN_F_STARTED; 2231 + 2232 + /* we need a max of 4 descriptors for each client */ 2233 + ret = init_ring(&c->ring, 4); 2234 + if (ret) 2235 + goto done; 2236 + 2237 + if (setup_buffer_rings(&c->ring, c)) 2238 + goto done; 2239 + 2240 + /* 2241 + * If we're using fixed files, then we need to wait for the parent 2242 + * to install the c->in_fd into our direct descriptor table. When 2243 + * that happens, we'll set things up. If we're not using fixed files, 2244 + * we can set up the receive or connect now. 2245 + */ 2246 + if (!fixed_files) 2247 + open_socket(c); 2248 + 2249 + /* we're ready */ 2250 + pthread_barrier_wait(&c->startup_barrier); 2251 + 2252 + __event_loop(&c->ring, c); 2253 + done: 2254 + return NULL; 2255 + } 2256 + 2257 + static void usage(const char *name) 2258 + { 2259 + printf("%s:\n", name); 2260 + printf("\t-m:\t\tUse multishot receive (%d)\n", recv_mshot); 2261 + printf("\t-d:\t\tUse DEFER_TASKRUN (%d)\n", defer_tw); 2262 + printf("\t-S:\t\tUse SQPOLL (%d)\n", sqpoll); 2263 + printf("\t-f:\t\tUse only fixed files (%d)\n", fixed_files); 2264 + printf("\t-a:\t\tUse huge pages for the ring (%d)\n", use_huge); 2265 + printf("\t-t:\t\tTimeout for waiting on CQEs (usec) (%d)\n", wait_usec); 2266 + printf("\t-w:\t\tNumber of CQEs to wait for each loop (%d)\n", wait_batch); 2267 + printf("\t-B:\t\tUse bi-directional mode (%d)\n", bidi); 2268 + printf("\t-s:\t\tAct only as a sink (%d)\n", is_sink); 2269 + printf("\t-q:\t\tRing size to use (%d)\n", ring_size); 2270 + printf("\t-H:\t\tHost to connect to (%s)\n", host); 2271 + printf("\t-r:\t\tPort to receive on (%d)\n", receive_port); 2272 + printf("\t-p:\t\tPort to connect to (%d)\n", send_port); 2273 + printf("\t-6:\t\tUse IPv6 (%d)\n", ipv6); 2274 + printf("\t-N:\t\tUse NAPI polling (%d)\n", napi); 2275 + printf("\t-T:\t\tNAPI timeout (usec) (%d)\n", napi_timeout); 2276 + printf("\t-b:\t\tSend/receive buf size (%d)\n", buf_size); 2277 + printf("\t-n:\t\tNumber of provided buffers (pow2) (%d)\n", nr_bufs); 2278 + printf("\t-u:\t\tUse provided buffers for send (%d)\n", send_ring); 2279 + printf("\t-C:\t\tUse bundles for send (%d)\n", snd_bundle); 2280 + printf("\t-z:\t\tUse zerocopy send (%d)\n", snd_zc); 2281 + printf("\t-c:\t\tUse bundles for recv (%d)\n", snd_bundle); 2282 + printf("\t-M:\t\tUse sendmsg (%d)\n", snd_msg); 2283 + printf("\t-M:\t\tUse recvmsg (%d)\n", rcv_msg); 2284 + printf("\t-x:\t\tShow extended stats (%d)\n", ext_stat); 2285 + printf("\t-V:\t\tIncrease verbosity (%d)\n", verbose); 2286 + } 2287 + 2288 + /* 2289 + * Options parsing the ring / net setup 2290 + */ 2291 + int main(int argc, char *argv[]) 2292 + { 2293 + struct io_uring ring; 2294 + struct sigaction sa = { }; 2295 + const char *optstring; 2296 + int opt, ret, fd; 2297 + 2298 + setlocale(LC_NUMERIC, "en_US"); 2299 + 2300 + page_size = sysconf(_SC_PAGESIZE); 2301 + if (page_size < 0) { 2302 + perror("sysconf(_SC_PAGESIZE)"); 2303 + return 1; 2304 + } 2305 + 2306 + pthread_mutex_init(&thread_lock, NULL); 2307 + 2308 + optstring = "m:d:S:s:b:f:H:r:p:n:B:N:T:w:t:M:R:u:c:C:q:a:x:z:6Vh?"; 2309 + while ((opt = getopt(argc, argv, optstring)) != -1) { 2310 + switch (opt) { 2311 + case 'm': 2312 + recv_mshot = !!atoi(optarg); 2313 + break; 2314 + case 'S': 2315 + sqpoll = !!atoi(optarg); 2316 + break; 2317 + case 'd': 2318 + defer_tw = !!atoi(optarg); 2319 + break; 2320 + case 'b': 2321 + buf_size = atoi(optarg); 2322 + break; 2323 + case 'n': 2324 + nr_bufs = atoi(optarg); 2325 + break; 2326 + case 'u': 2327 + send_ring = !!atoi(optarg); 2328 + break; 2329 + case 'c': 2330 + rcv_bundle = !!atoi(optarg); 2331 + break; 2332 + case 'C': 2333 + snd_bundle = !!atoi(optarg); 2334 + break; 2335 + case 'w': 2336 + wait_batch = atoi(optarg); 2337 + break; 2338 + case 't': 2339 + wait_usec = atoi(optarg); 2340 + break; 2341 + case 's': 2342 + is_sink = !!atoi(optarg); 2343 + break; 2344 + case 'f': 2345 + fixed_files = !!atoi(optarg); 2346 + break; 2347 + case 'H': 2348 + host = strdup(optarg); 2349 + break; 2350 + case 'r': 2351 + receive_port = atoi(optarg); 2352 + break; 2353 + case 'p': 2354 + send_port = atoi(optarg); 2355 + break; 2356 + case 'B': 2357 + bidi = !!atoi(optarg); 2358 + break; 2359 + case 'N': 2360 + napi = !!atoi(optarg); 2361 + break; 2362 + case 'T': 2363 + napi_timeout = atoi(optarg); 2364 + break; 2365 + case '6': 2366 + ipv6 = true; 2367 + break; 2368 + case 'M': 2369 + snd_msg = !!atoi(optarg); 2370 + break; 2371 + case 'z': 2372 + snd_zc = !!atoi(optarg); 2373 + break; 2374 + case 'R': 2375 + rcv_msg = !!atoi(optarg); 2376 + break; 2377 + case 'q': 2378 + ring_size = atoi(optarg); 2379 + break; 2380 + case 'a': 2381 + use_huge = !!atoi(optarg); 2382 + break; 2383 + case 'x': 2384 + ext_stat = !!atoi(optarg); 2385 + break; 2386 + case 'V': 2387 + verbose++; 2388 + break; 2389 + case 'h': 2390 + default: 2391 + usage(argv[0]); 2392 + return 1; 2393 + } 2394 + } 2395 + 2396 + if (bidi && is_sink) { 2397 + fprintf(stderr, "Can't be both bidi proxy and sink\n"); 2398 + return 1; 2399 + } 2400 + if (snd_msg && sqpoll) { 2401 + fprintf(stderr, "SQPOLL with msg variants disabled\n"); 2402 + snd_msg = 0; 2403 + } 2404 + if (rcv_msg && rcv_bundle) { 2405 + fprintf(stderr, "Can't use bundles with recvmsg\n"); 2406 + rcv_msg = 0; 2407 + } 2408 + if (snd_msg && snd_bundle) { 2409 + fprintf(stderr, "Can't use bundles with sendmsg\n"); 2410 + snd_msg = 0; 2411 + } 2412 + if (snd_msg && send_ring) { 2413 + fprintf(stderr, "Can't use send ring sendmsg\n"); 2414 + snd_msg = 0; 2415 + } 2416 + if (snd_zc && (send_ring || snd_bundle)) { 2417 + fprintf(stderr, "Can't use send zc with bundles or ring\n"); 2418 + send_ring = snd_bundle = 0; 2419 + } 2420 + /* 2421 + * For recvmsg w/multishot, we waste some data at the head of the 2422 + * packet every time. Adjust the buffer size to account for that, 2423 + * so we're still handing 'buf_size' actual payload of data. 2424 + */ 2425 + if (rcv_msg && recv_mshot) { 2426 + fprintf(stderr, "Adjusted buf size for recvmsg w/multishot\n"); 2427 + buf_size += sizeof(struct io_uring_recvmsg_out); 2428 + } 2429 + 2430 + br_mask = nr_bufs - 1; 2431 + 2432 + fd = setup_listening_socket(receive_port, ipv6); 2433 + if (is_sink) 2434 + send_port = -1; 2435 + 2436 + if (fd == -1) 2437 + return 1; 2438 + 2439 + atexit(show_stats); 2440 + sa.sa_handler = sig_int; 2441 + sa.sa_flags = SA_RESTART; 2442 + sigaction(SIGINT, &sa, NULL); 2443 + 2444 + ret = init_ring(&ring, MAX_CONNS * 3); 2445 + if (ret) 2446 + return ret; 2447 + 2448 + printf("Backend: sqpoll=%d, defer_tw=%d, fixed_files=%d, " 2449 + "is_sink=%d, buf_size=%d, nr_bufs=%d, host=%s, send_port=%d, " 2450 + "receive_port=%d, napi=%d, napi_timeout=%d, huge_page=%d\n", 2451 + sqpoll, defer_tw, fixed_files, is_sink, 2452 + buf_size, nr_bufs, host, send_port, receive_port, 2453 + napi, napi_timeout, use_huge); 2454 + printf(" recv options: recvmsg=%d, recv_mshot=%d, recv_bundle=%d\n", 2455 + rcv_msg, recv_mshot, rcv_bundle); 2456 + printf(" send options: sendmsg=%d, send_ring=%d, send_bundle=%d, " 2457 + "send_zerocopy=%d\n", snd_msg, send_ring, snd_bundle, 2458 + snd_zc); 2459 + 2460 + return parent_loop(&ring, fd); 2461 + }

+102

vendor/liburing/examples/proxy.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + #ifndef LIBURING_PROXY_H 3 + #define LIBURING_PROXY_H 4 + 5 + #include <sys/time.h> 6 + 7 + /* 8 + * Generic opcode agnostic encoding to sqe/cqe->user_data 9 + */ 10 + struct userdata { 11 + union { 12 + struct { 13 + uint16_t op_tid; /* 4 bits op, 12 bits tid */ 14 + uint16_t bid; 15 + uint16_t fd; 16 + }; 17 + uint64_t val; 18 + }; 19 + }; 20 + 21 + #define OP_SHIFT (12) 22 + #define TID_MASK ((1U << 12) - 1) 23 + 24 + /* 25 + * Packs the information that we will need at completion time into the 26 + * sqe->user_data field, which is passed back in the completion in 27 + * cqe->user_data. Some apps would need more space than this, and in fact 28 + * I'd love to pack the requested IO size in here, and it's not uncommon to 29 + * see apps use this field as just a cookie to either index a data structure 30 + * at completion time, or even just put the pointer to the associated 31 + * structure into this field. 32 + */ 33 + static inline void __encode_userdata(struct io_uring_sqe *sqe, int tid, int op, 34 + int bid, int fd) 35 + { 36 + struct userdata ud = { 37 + .op_tid = (op << OP_SHIFT) | tid, 38 + .bid = bid, 39 + .fd = fd 40 + }; 41 + 42 + io_uring_sqe_set_data64(sqe, ud.val); 43 + } 44 + 45 + static inline uint64_t __raw_encode(int tid, int op, int bid, int fd) 46 + { 47 + struct userdata ud = { 48 + .op_tid = (op << OP_SHIFT) | tid, 49 + .bid = bid, 50 + .fd = fd 51 + }; 52 + 53 + return ud.val; 54 + } 55 + 56 + static inline int cqe_to_op(struct io_uring_cqe *cqe) 57 + { 58 + struct userdata ud = { .val = cqe->user_data }; 59 + 60 + return ud.op_tid >> OP_SHIFT; 61 + } 62 + 63 + static inline int cqe_to_bid(struct io_uring_cqe *cqe) 64 + { 65 + struct userdata ud = { .val = cqe->user_data }; 66 + 67 + return ud.bid; 68 + } 69 + 70 + static inline int cqe_to_fd(struct io_uring_cqe *cqe) 71 + { 72 + struct userdata ud = { .val = cqe->user_data }; 73 + 74 + return ud.fd; 75 + } 76 + 77 + static unsigned long long mtime_since(const struct timeval *s, 78 + const struct timeval *e) 79 + { 80 + long long sec, usec; 81 + 82 + sec = e->tv_sec - s->tv_sec; 83 + usec = (e->tv_usec - s->tv_usec); 84 + if (sec > 0 && usec < 0) { 85 + sec--; 86 + usec += 1000000; 87 + } 88 + 89 + sec *= 1000; 90 + usec /= 1000; 91 + return sec + usec; 92 + } 93 + 94 + static unsigned long long mtime_since_now(struct timeval *tv) 95 + { 96 + struct timeval end; 97 + 98 + gettimeofday(&end, NULL); 99 + return mtime_since(tv, &end); 100 + } 101 + 102 + #endif

+73 -13

vendor/liburing/examples/send-zerocopy.c

··· 39 39 #include <sys/wait.h> 40 40 #include <sys/mman.h> 41 41 #include <linux/mman.h> 42 + #include <signal.h> 42 43 43 44 #include "liburing.h" 44 45 ··· 52 53 int idx; 53 54 unsigned long long packets; 54 55 unsigned long long bytes; 56 + unsigned long long dt_ms; 55 57 struct sockaddr_storage dst_addr; 56 58 int fd; 57 59 }; ··· 72 74 static int cfg_payload_len; 73 75 static int cfg_port = 8000; 74 76 static int cfg_runtime_ms = 4200; 77 + static bool cfg_rx_poll = false; 75 78 76 79 static socklen_t cfg_alen; 77 80 static char *str_addr = NULL; ··· 81 84 static struct thread_data threads[MAX_THREADS]; 82 85 static pthread_barrier_t barrier; 83 86 87 + static bool should_stop = false; 88 + 89 + static void sigint_handler(__attribute__((__unused__)) int sig) 90 + { 91 + /* kill if should_stop can't unblock threads fast enough */ 92 + if (should_stop) 93 + _exit(-1); 94 + should_stop = true; 95 + } 96 + 84 97 /* 85 98 * Implementation of error(3), prints an error message and exits. 86 99 */ ··· 119 132 if (cfg_cpu == -1) 120 133 return; 121 134 135 + CPU_ZERO(&mask); 136 + CPU_SET(cfg_cpu, &mask); 122 137 ret = io_uring_register_iowq_aff(ring, 1, &mask); 123 138 if (ret) 124 139 t_error(1, ret, "unabled to set io-wq affinity\n"); ··· 315 330 const int notif_slack = 128; 316 331 struct io_uring ring; 317 332 struct iovec iov; 318 - uint64_t tstop; 333 + uint64_t tstart; 319 334 int i, fd, ret; 320 335 int compl_cqes = 0; 321 336 int ring_flags = IORING_SETUP_COOP_TASKRUN | IORING_SETUP_SINGLE_ISSUER; 337 + unsigned loop = 0; 322 338 323 339 if (cfg_defer_taskrun) 324 340 ring_flags |= IORING_SETUP_DEFER_TASKRUN; ··· 355 371 if (ret) 356 372 t_error(1, ret, "io_uring: buffer registration"); 357 373 374 + if (cfg_rx_poll) { 375 + struct io_uring_sqe *sqe; 376 + 377 + sqe = io_uring_get_sqe(&ring); 378 + io_uring_prep_poll_add(sqe, fd, POLLIN); 379 + 380 + ret = io_uring_submit(&ring); 381 + if (ret != 1) 382 + t_error(1, ret, "submit poll"); 383 + } 384 + 358 385 pthread_barrier_wait(&barrier); 359 386 360 - tstop = gettimeofday_ms() + cfg_runtime_ms; 387 + tstart = gettimeofday_ms(); 361 388 do { 362 389 struct io_uring_sqe *sqe; 363 390 struct io_uring_cqe *cqe; ··· 419 446 } 420 447 io_uring_cqe_seen(&ring, cqe); 421 448 } 422 - } while (gettimeofday_ms() < tstop); 449 + if (should_stop) 450 + break; 451 + } while ((++loop % 16 != 0) || gettimeofday_ms() < tstart + cfg_runtime_ms); 452 + 453 + td->dt_ms = gettimeofday_ms() - tstart; 423 454 424 455 out_fail: 425 456 shutdown(fd, SHUT_RDWR); ··· 435 466 io_uring_queue_exit(&ring); 436 467 } 437 468 438 - 439 469 static void *do_test(void *arg) 440 470 { 441 471 struct thread_data *td = arg; ··· 450 480 451 481 static void usage(const char *filepath) 452 482 { 453 - t_error(1, 0, "Usage: %s [-n<N>] [-z<val>] [-s<payload size>] " 454 - "(-4|-6) [-t<time s>] -D<dst_ip> udp", filepath); 483 + printf("Usage:\t%s <protocol> <ip-version> -D<addr> [options]\n", filepath); 484 + printf("\t%s <protocol> <ip-version> -R [options]\n\n", filepath); 485 + 486 + printf(" -4\t\tUse IPv4\n"); 487 + printf(" -6\t\tUse IPv4\n"); 488 + printf(" -D <address>\tDestination address\n"); 489 + printf(" -p <port>\tServer port to listen on/connect to\n"); 490 + printf(" -s <size>\tBytes per request\n"); 491 + printf(" -s <size>\tBytes per request\n"); 492 + printf(" -n <nr>\tNumber of parallel requests\n"); 493 + printf(" -z <mode>\tZerocopy mode, 0 to disable, enabled otherwise\n"); 494 + printf(" -b <mode>\tUse registered buffers\n"); 495 + printf(" -l <mode>\tUse huge pages\n"); 496 + printf(" -d\t\tUse defer taskrun\n"); 497 + printf(" -C <cpu>\tPin to the specified CPU\n"); 498 + printf(" -T <nr>\tNumber of threads to use for sending\n"); 499 + printf(" -R\t\tPlay the server role\n"); 500 + printf(" -t <seconds>\tTime in seconds\n"); 455 501 } 456 502 457 503 static void parse_opts(int argc, char **argv) ··· 463 509 int c; 464 510 char *daddr = NULL; 465 511 466 - if (argc <= 1) 512 + if (argc <= 1) { 467 513 usage(argv[0]); 514 + exit(0); 515 + } 468 516 469 517 cfg_payload_len = max_payload_len; 470 518 471 - while ((c = getopt(argc, argv, "46D:p:s:t:n:z:b:l:dC:T:R")) != -1) { 519 + while ((c = getopt(argc, argv, "46D:p:s:t:n:z:b:l:dC:T:Ry")) != -1) { 472 520 switch (c) { 473 521 case '4': 474 522 if (cfg_family != PF_UNSPEC) ··· 520 568 case 'R': 521 569 cfg_rx = 1; 522 570 break; 571 + case 'y': 572 + cfg_rx_poll = 1; 573 + break; 523 574 } 524 575 } 525 576 ··· 536 587 537 588 int main(int argc, char **argv) 538 589 { 590 + unsigned long long tsum = 0; 539 591 unsigned long long packets = 0, bytes = 0; 540 592 struct thread_data *td; 541 593 const char *cfg_test; ··· 577 629 if (cfg_rx) 578 630 do_setup_rx(cfg_family, cfg_type, 0); 579 631 632 + if (!cfg_rx) 633 + signal(SIGINT, sigint_handler); 634 + 580 635 for (i = 0; i < cfg_nr_threads; i++) 581 636 pthread_create(&threads[i].thread, NULL, 582 637 !cfg_rx ? do_test : do_rx, &threads[i]); ··· 586 641 pthread_join(td->thread, &res); 587 642 packets += td->packets; 588 643 bytes += td->bytes; 644 + tsum += td->dt_ms; 589 645 } 646 + tsum = tsum / cfg_nr_threads; 590 647 591 - fprintf(stderr, "packets=%llu (MB=%llu), rps=%llu (MB/s=%llu)\n", 592 - packets, bytes >> 20, 593 - packets / (cfg_runtime_ms / 1000), 594 - (bytes >> 20) / (cfg_runtime_ms / 1000)); 595 - 648 + if (!tsum) { 649 + printf("The run is too short, can't gather stats\n"); 650 + } else { 651 + printf("packets=%llu (MB=%llu), rps=%llu (MB/s=%llu)\n", 652 + packets, bytes >> 20, 653 + packets * 1000 / tsum, 654 + (bytes >> 20) * 1000 / tsum); 655 + } 596 656 pthread_barrier_destroy(&barrier); 597 657 return 0; 598 658 }

+1 -1

vendor/liburing/liburing.pc.in

··· 9 9 URL: https://git.kernel.dk/cgit/liburing/ 10 10 11 11 Libs: -L${libdir} -luring 12 - Cflags: -I${includedir} 12 + Cflags: -I${includedir} -D_GNU_SOURCE

+2 -2

vendor/liburing/liburing.spec

··· 1 1 Name: liburing 2 - Version: 2.4 2 + Version: 2.7 3 3 Release: 1%{?dist} 4 4 Summary: Linux-native io_uring I/O access library 5 5 License: (GPLv2 with exceptions and LGPLv2+) or MIT ··· 27 27 28 28 %build 29 29 %set_build_flags 30 - ./configure --prefix=%{_prefix} --libdir=/%{_libdir} --libdevdir=/%{_libdir} --mandir=%{_mandir} --includedir=%{_includedir} 30 + ./configure --prefix=%{_prefix} --libdir=%{_libdir} --libdevdir=%{_libdir} --mandir=%{_mandir} --includedir=%{_includedir} 31 31 32 32 %make_build 33 33

+5 -3

vendor/liburing/make-debs.sh

··· 23 23 releasedir=$base/$(lsb_release -si)/liburing 24 24 rm -rf $releasedir 25 25 mkdir -p $releasedir 26 + HEAD=$(which head) 27 + DCH=$(which dch) 26 28 27 29 src_dir=$(readlink -e `basename $0`) 28 30 liburing_dir=$(dirname $src_dir) ··· 38 40 git clean -dxf 39 41 40 42 # Change changelog if it's needed 41 - cur_ver=`head -l debian/changelog | sed -n -e 's/.* ($.*$) .*/\1/p'` 43 + cur_ver=`$HEAD < debian/changelog | sed -n -e 's/.* ($.*$) .*/\1/p'` 42 44 if [ "$cur_ver" != "$version-1" ]; then 43 - dch -D $distro --force-distribution -b -v "$version-1" "new version" 45 + $DCH -D $distro --force-distribution -b -v "$version-1" "new version" 44 46 fi 45 47 46 - # Create tar archieve 48 + # Create tar archive 47 49 cd ../ 48 50 tar cvzf ${outfile}.tar.gz ${outfile} 49 51 ln -s ${outfile}.tar.gz ${orgfile}.orig.tar.gz

+8

vendor/liburing/man/io_uring_buf_ring_add.3

··· 46 46 47 47 .SH RETURN VALUE 48 48 None 49 + .SH NOTES 50 + liburing (or the kernel, for that matter) doesn't care about what buffer ID maps 51 + to what buffer, and in fact when recycling buffers after use, the application is 52 + free to add a different buffer into the same buffer ID location. All that 53 + matters is that the application knows what a given buffer ID in time corresponds 54 + to in terms of virtual memory. There's no liburing or kernel assumption that 55 + these mappings are persistent over time, they can very well be different every 56 + time a given buffer ID is added to the provided buffer ring. 49 57 .SH SEE ALSO 50 58 .BR io_uring_register_buf_ring (3), 51 59 .BR io_uring_buf_ring_mask (3),

+46

vendor/liburing/man/io_uring_buf_ring_available.3

··· 1 + .\" Copyright (C) 2022 Jens Axboe <axboe@kernel.dk> 2 + .\" 3 + .\" SPDX-License-Identifier: LGPL-2.0-or-later 4 + .\" 5 + .TH io_uring_buf_ring_available 3 "Jan 11, 2024" "liburing-2.6" "liburing Manual" 6 + .SH NAME 7 + io_uring_buf_ring_available \- return number of unconsumed provided ring buffer entries 8 + .SH SYNOPSIS 9 + .nf 10 + .B #include <liburing.h> 11 + .PP 12 + .BI "int io_uring_buf_ring_available(struct io_uring *" ring ", 13 + .BI " struct io_uring_buf_ring *" br ", 14 + .BI " unsigned short " bgid ");" 15 + .fi 16 + .SH DESCRIPTION 17 + .PP 18 + The 19 + .BR io_uring_buf_ring_available (3) 20 + helper returns the number of unconsumed (by the kernel) entries in the 21 + .IR br 22 + provided buffer group belonging to the io_uring 23 + .IR ring 24 + and identified by the buffer group ID 25 + .IR bgid. 26 + 27 + Since the head of the provided buffer ring is only visible to the kernel, it's 28 + impossible to otherwise know how many unconsumed entries exist in the given 29 + provided buffer ring. This function query the kernel to return that number. 30 + 31 + .SH NOTES 32 + The returned number of entries reflect the amount of unconsumed entries at the 33 + time that it was queried. If inflight IO exists that may consume provided 34 + buffers from this buffer group, then the returned value is inherently racy. 35 + .SH RETURN VALUE 36 + Returns the number of unconsumed entries on success, which may be 0. In case 37 + of error, may return 38 + .BR -ENOENT 39 + if the specified buffer group doesn't exist, or 40 + .BR -EINVAL 41 + if the buffer group isn't of the correct type, or if the kernel doesn't 42 + support this feature. 43 + .SH SEE ALSO 44 + .BR io_uring_register_buf_ring (3), 45 + .BR io_uring_buf_ring_add (3), 46 + .BR io_uring_buf_ring_cq_advance (3)

+6

vendor/liburing/man/io_uring_buf_ring_init.3

··· 23 23 .SH RETURN VALUE 24 24 None 25 25 26 + .SH NOTES 27 + Unless manual setup is needed, it's recommended to use 28 + .BR io_uring_setup_buf_ring (3) 29 + as it provides a simpler way to setup a provided buffer ring. 30 + . 26 31 .SH SEE ALSO 27 32 .BR io_uring_register_buf_ring (3), 33 + .BR io_uring_setup_buf_ring (3), 28 34 .BR io_uring_buf_ring_add (3) 29 35 .BR io_uring_buf_ring_advance (3), 30 36 .BR io_uring_buf_ring_cq_advance (3)

+2 -2

vendor/liburing/man/io_uring_check_version.3

··· 23 23 The 24 24 .BR io_uring_check_version (3) 25 25 function returns 26 - .I true 26 + .I false 27 27 if the liburing library loaded by the dynamic linker is greater-than 28 28 or equal-to the 29 29 .I major ··· 35 35 The 36 36 .BR IO_URING_CHECK_VERSION (3) 37 37 macro returns 38 - .I 1 38 + .I 0 39 39 if the liburing library being compiled against is greater-than or equal-to the 40 40 .I major 41 41 and

+7 -1

vendor/liburing/man/io_uring_cq_has_overflow.3

··· 18 18 function informs the application if CQ entries have overflowed and are waiting to be flushed to 19 19 the CQ ring. For example using 20 20 .BR io_uring_get_events (3) 21 - . 21 + .SH NOTES 22 + Using this function is only valid if the ring has 23 + .B IORING_FEAT_NODROP 24 + set, as it's checking for a flag set by kernels supporting that feature. For 25 + really old kernels that don't support this feature, if CQE overflow is 26 + experienced the CQEs are lost. If that happens, the CQ ring overflow offset 27 + will get incremented. 22 28 .SH RETURN VALUE 23 29 True if there are CQ entries waiting to be flushed to the CQ ring. 24 30 .SH SEE ALSO

+40

vendor/liburing/man/io_uring_enable_rings.3

··· 1 + .\" Copyright (C) 2023 nick black <dankamongmen@gmail.com> 2 + .\" 3 + .\" SPDX-License-Identifier: LGPL-2.0-or-later 4 + .\" 5 + .TH io_uring_enable_rings 3 "July 26, 2024" "liburing-2.7" "liburing Manual" 6 + .SH NAME 7 + io_uring_enable_rings \- enable a disabled ring 8 + .SH SYNOPSIS 9 + .nf 10 + .B #include <liburing.h> 11 + .PP 12 + .BI "int io_uring_enable_rings(struct io_uring *" ring ");" 13 + .fi 14 + .SH DESCRIPTION 15 + .PP 16 + The 17 + .BR io_uring_enable_rings (3) 18 + function enables a ring after having created it with the 19 + .B IORING_SETUP_R_DISABLED 20 + flag to 21 + .BR io_uring_queue_init (3) 22 + 23 + It is not possible to submit work to such a ring until this 24 + function has been successfully called. 25 + 26 + .SH RETURN VALUE 27 + .BR io_uring_enable_rings (3) 28 + returns 0 on success. It otherwise returns a negative error code. 29 + It does not write to 30 + .BR errno . 31 + 32 + .SH ERRORS 33 + .TP 34 + .B EBADFD 35 + The ring was not disabled. 36 + 37 + .SH SEE ALSO 38 + .BR io_uring_queue_init (3), 39 + .BR io_uring_register (2), 40 + .BR io_uring_setup (2)

+49 -38

vendor/liburing/man/io_uring_enter.2

··· 302 302 request has been terminated and no further events will be generated. This mode 303 303 is available since 5.13. 304 304 305 + This command works like 306 + an async 307 + .BR poll(2) 308 + and the completion event result is the returned mask of events. 309 + .TP 310 + .B IORING_OP_POLL_REMOVE 311 + Remove or update an existing poll request. If found, the 312 + .I res 313 + field of the 314 + .I "struct io_uring_cqe" 315 + will contain 0. If not found, 316 + .I res 317 + will contain 318 + .B -ENOENT, 319 + or 320 + .B -EALREADY 321 + if the poll request was in the process of completing already. 322 + 305 323 If 306 324 .B IORING_POLL_UPDATE_EVENTS 307 325 is set in the SQE ··· 311 329 .I user_data 312 330 field of the original SQE submitted, and this values is passed in the 313 331 .I addr 314 - field of the SQE. This mode is available since 5.13. 315 - 332 + field of the SQE. 316 333 If 317 334 .B IORING_POLL_UPDATE_USER_DATA 318 335 is set in the SQE ··· 321 338 .I user_data 322 339 of an existing poll request based on the value passed in the 323 340 .I off 324 - field. This mode is available since 5.13. 325 - 326 - This command works like 327 - an async 328 - .BR poll(2) 329 - and the completion event result is the returned mask of events. For the 330 - variants that update 331 - .I user_data 332 - or 333 - .I events 334 - , the completion result will be similar to 335 - .B IORING_OP_POLL_REMOVE. 336 - 337 - .TP 338 - .B IORING_OP_POLL_REMOVE 339 - Remove an existing poll request. If found, the 340 - .I res 341 - field of the 342 - .I "struct io_uring_cqe" 343 - will contain 0. If not found, 344 - .I res 345 - will contain 346 - .B -ENOENT, 347 - or 348 - .B -EALREADY 349 - if the poll request was in the process of completing already. 341 + field. Updating an existing poll is available since 5.13. 350 342 351 343 .TP 352 344 .B IORING_OP_EPOLL_CTL ··· 357 349 for details of the system call. 358 350 .I fd 359 351 holds the file descriptor that represents the epoll instance, 360 - .I addr 352 + .I off 361 353 holds the file descriptor to add, remove or modify, 362 354 .I len 363 355 holds the operation (EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD) to perform and, 364 - .I off 356 + .I addr 365 357 holds a pointer to the 366 - .I epoll_events 358 + .I epoll_event 367 359 structure. Available since 5.6. 368 360 369 361 .TP ··· 494 486 .B IORING_OP_TIMEOUT 495 487 This command will register a timeout operation. The 496 488 .I addr 497 - field must contain a pointer to a struct timespec64 structure, 489 + field must contain a pointer to a struct __kernel_timespec structure, 498 490 .I len 499 - must contain 1 to signify one timespec64 structure, 491 + must contain 1 to signify one __kernel_timespec structure, 500 492 .I timeout_flags 501 493 may contain IORING_TIMEOUT_ABS 502 494 for an absolute timeout value, or 0 for a relative timeout. ··· 567 559 .I addr 568 560 and return values are same as before. 569 561 .I addr2 570 - field must contain a pointer to a struct timespec64 structure. 562 + field must contain a pointer to a struct __kernel_timespec structure. 571 563 .I timeout_flags 572 564 may also contain IORING_TIMEOUT_ABS, in which case the value given is an 573 565 absolute one, not a relative one. ··· 1242 1234 .in 1243 1235 .PP 1244 1236 1237 + .TP 1238 + .B IORING_OP_WAITID 1239 + Issue the equivalent of a 1240 + .BR waitid(2) 1241 + system call. 1242 + .I len 1243 + must contain the idtype being queried/waited for and 1244 + .I fd 1245 + must contain the 'pid' (or id) being waited for. 1246 + .I file_index 1247 + is the 'options' being set (the child state changes to wait for). 1248 + .I addr2 1249 + is a pointer to siginfo_t, if any, being filled in. See also 1250 + .BR waitid(2) 1251 + for the general description of the related system call. Available since 6.5. 1252 + 1245 1253 .PP 1246 1254 The 1247 1255 .I flags ··· 1274 1282 chain. This flag has no effect on previous SQE submissions, nor does it impact 1275 1283 SQEs that are outside of the chain tail. This means that multiple chains can be 1276 1284 executing in parallel, or chains and individual SQEs. Only members inside the 1277 - chain are serialized. A chain of SQEs will be broken, if any request in that 1285 + chain are serialized. A chain of SQEs will be broken if any request in that 1278 1286 chain ends in error. io_uring considers any unexpected result an error. This 1279 1287 means that, eg, a short read will also terminate the remainder of the chain. 1280 1288 If a chain of SQE links is broken, the remaining unstarted part of the chain ··· 1330 1338 1331 1339 The semantics are chosen to accommodate several use cases. First, when all but 1332 1340 the last request of a normal link without linked timeouts are marked with the 1333 - flag, only one CQE per lin is posted. Additionally, it enables suppression of 1341 + flag, only one CQE per link is posted. Additionally, it enables suppression of 1334 1342 CQEs in cases where the side effects of a successfully executed operation is 1335 1343 enough for userspace to know the state of the system. One such example would 1336 1344 be writing to a synchronisation file. ··· 1517 1525 .B IORING_FEAT_NODROP 1518 1526 feature, and there are no otherwise available CQEs. This clears the error state 1519 1527 and so with no other changes the next call to 1520 - .BR io_uring_setup (2) 1528 + .BR io_uring_enter (2) 1521 1529 will not have this error. This error should be extremely rare and indicates the 1522 - machine is running critically low on memory and. It may be reasonable for the 1530 + machine is running critically low on memory. It may be reasonable for the 1523 1531 application to terminate running unless it is able to safely handle any CQE 1524 1532 being lost. 1525 1533 .TP ··· 1540 1548 occur if the application tries to queue more requests than we have room for in 1541 1549 the CQ ring, or if the application attempts to wait for more events without 1542 1550 having reaped the ones already present in the CQ ring. 1551 + .TP 1552 + .B EEXIST 1553 + The thread submitting the work is invalid. 1543 1554 .TP 1544 1555 .B EINVAL 1545 1556 Some bits in the

+1 -1

vendor/liburing/man/io_uring_free_buf_ring.3

··· 46 46 47 47 .SH RETURN VALUE 48 48 On success 49 - .BR io_uring_register_free_ring (3) 49 + .BR io_uring_free_buf_ring (3) 50 50 returns a pointer to the buffe ring. On failure it returns 51 51 .BR -errno . 52 52 .SH SEE ALSO

+54

vendor/liburing/man/io_uring_prep_bind.3

··· 1 + .\" Copyright (C) 2024 SUSE LLC 2 + .\" 3 + .\" SPDX-License-Identifier: LGPL-2.0-or-later 4 + .\" 5 + .TH io_uring_prep_bind 3 "Jun 3, 2024" "liburing-2.7" "liburing Manual" 6 + .SH NAME 7 + io_uring_prep_bind \- prepare a bind request 8 + .SH SYNOPSIS 9 + .nf 10 + .B #include <sys/socket.h> 11 + .B #include <liburing.h> 12 + .PP 13 + .BI "void io_uring_prep_bind(struct io_uring_sqe *" sqe "," 14 + .BI " int " sockfd "," 15 + .BI " struct sockaddr *" addr "," 16 + .BI " socklen_t " addrlen ");" 17 + .fi 18 + .SH DESCRIPTION 19 + The 20 + .BR io_uring_prep_bind (3) 21 + function prepares a bind request. The submission queue entry 22 + .I sqe 23 + is setup to assign the network address at 24 + .IR addr , 25 + of length 26 + .IR addrlen , 27 + to the socket descriptor 28 + .IR sockfd. 29 + 30 + This function prepares an async 31 + .BR bind (2) 32 + request. See that man page for details. 33 + 34 + .SH RETURN VALUE 35 + None 36 + .SH ERRORS 37 + The CQE 38 + .I res 39 + field will contain the result of the operation. See the related man page for 40 + details on possible values. Note that where synchronous system calls will return 41 + .B -1 42 + on failure and set 43 + .I errno 44 + to the actual error value, io_uring never uses 45 + .IR errno . 46 + Instead it returns the negated 47 + .I errno 48 + directly in the CQE 49 + .I res 50 + field. 51 + .SH SEE ALSO 52 + .BR io_uring_get_sqe (3), 53 + .BR io_uring_submit (3), 54 + .BR bind (2)

+6

vendor/liburing/man/io_uring_prep_cancel.3

··· 74 74 .BR io_uring_prep_cancel_fd (3) 75 75 sets up. Available since 5.19. 76 76 .TP 77 + .B IORING_ASYNC_CANCEL_FD_FIXED 78 + Set in conjunction with 79 + .B IORING_ASYNC_CANCEL_FD , 80 + indicating that the file descriptor given is a direct descriptor rather than 81 + a normal file descriptor. Available since 6.0. 82 + .TP 77 83 .B IORING_ASYNC_CANCEL_ANY 78 84 Match any request in the ring, regardless of user_data or file descriptor. 79 85 Can be used to cancel any pending request in the ring. Available since 5.19.

+1

vendor/liburing/man/io_uring_prep_cancel_fd.3

··· 1 + io_uring_prep_cancel.3

+123

vendor/liburing/man/io_uring_prep_cmd.3

··· 1 + .\" Copyright (C) 2023 Breno Leitao <leitao@debian.org> 2 + .\" 3 + .\" SPDX-License-Identifier: LGPL-2.0-or-later 4 + .\" 5 + .TH io_uring_prep_cmd 3 "July 27, 2023" "liburing-2.5" "liburing Manual" 6 + .SH NAME 7 + io_uring_prep_cmd_sock \- prepare a command request for a socket 8 + .SH SYNOPSIS 9 + .nf 10 + .B #include <liburing.h> 11 + .PP 12 + .BI "void io_uring_prep_cmd_sock(struct io_uring_sqe *" sqe "," 13 + .BI " int " cmd_op "," 14 + .BI " int " fd "," 15 + .BI " int " level "," 16 + .BI " int " optname "," 17 + .BI " void " *optval "," 18 + .BI " int " optlen ");" 19 + .fi 20 + .SH DESCRIPTION 21 + .PP 22 + The 23 + .BR io_uring_prep_cmd_sock (3) 24 + function prepares an cmd request for a socket. The submission queue entry 25 + .I sqe 26 + is setup to use the socket file descriptor pointed to by 27 + .I fd 28 + to start an command operation defined by 29 + .I cmd_op. 30 + 31 + This is a generic function, and each command has their own individual 32 + .I level, optname, optval 33 + values. The optlen defines the size pointed by 34 + .I optval. 35 + 36 + .SH Available commands 37 + 38 + .TP 39 + .B SOCKET_URING_OP_SIOCINQ 40 + Returns the amount of queued unread data in the receive buffer. 41 + The socket must not be in LISTEN state, otherwise an error 42 + .B -EINVAL 43 + is returned in the CQE 44 + .I res 45 + field. 46 + The following arguments are not used for this command 47 + .I level, optname, optval 48 + and 49 + .I optlen. 50 + 51 + Negative return value means an error. 52 + 53 + For more information about this command, please check 54 + .BR unix(7). 55 + 56 + 57 + .TP 58 + .B SOCKET_URING_OP_SIOCOUTQ 59 + Returns the amount of unsent data in the socket send queue. 60 + The socket must not be in LISTEN state, otherwise an error 61 + .B -EINVAL 62 + is returned in the CQE 63 + .I res. 64 + field. 65 + The following arguments are not used for this command 66 + .I level, optname, optval 67 + and 68 + .I optlen. 69 + 70 + Negative return value means an error. 71 + 72 + For more information about this command, please check 73 + .BR unix(7). 74 + 75 + .TP 76 + .B SOCKET_URING_OP_GETSOCKOPT 77 + Command to get options for the socket referred to by the socket file descriptor 78 + .I fd. 79 + The arguments are similar to the 80 + .BR getsockopt(2) 81 + system call. 82 + 83 + The 84 + .BR SOCKET_URING_OP_GETSOCKOPT 85 + command is limited to 86 + .BR SOL_SOCKET 87 + .I level. 88 + 89 + Differently from the 90 + .BR getsockopt(2) 91 + system call, the updated 92 + .I optlen 93 + value is returned in the CQE 94 + .I res 95 + field, on success. On failure, the CQE 96 + .I res 97 + contains a negative error number. 98 + 99 + .TP 100 + .B SOCKET_URING_OP_SETSOCKOPT 101 + Command to set options for the socket referred to by the socket file descriptor 102 + .I fd. 103 + The arguments are similar to the 104 + .BR setsockopt(2) 105 + system call. 106 + 107 + .SH NOTES 108 + The memory block pointed by 109 + .I optval 110 + needs to be valid/live until the CQE returns. 111 + 112 + .SH RETURN VALUE 113 + Dependent on the command. 114 + 115 + .SH ERRORS 116 + The CQE 117 + .I res 118 + field will contain the result of the operation. 119 + .SH SEE ALSO 120 + .BR io_uring_get_sqe (3), 121 + .BR io_uring_submit (3), 122 + .BR io_uring_register (2), 123 + .BR unix (7)

+18 -1

vendor/liburing/man/io_uring_prep_fadvise.3

··· 13 13 .BI "void io_uring_prep_fadvise(struct io_uring_sqe *" sqe "," 14 14 .BI " int " fd "," 15 15 .BI " __u64 " offset "," 16 - .BI " off_t " len "," 16 + .BI " __u32 " len "," 17 17 .BI " int " advice ");" 18 + .BI " 19 + .BI "void io_uring_prep_fadvise64(struct io_uring_sqe *" sqe "," 20 + .BI " int " fd "," 21 + .BI " __u64 " offset "," 22 + .BI " __u64 " len "," 23 + .BI " int " advice ");" 18 24 .fi 19 25 .SH DESCRIPTION 20 26 .PP ··· 30 36 .I len 31 37 length in bytes, giving it the advise located in 32 38 .IR advice . 39 + 40 + The 41 + .BR io_uring_prep_fadvise64 (3) 42 + function works like 43 + .BR io_uring_prep_fadvise (3) 44 + except that it takes a 64-bit length rather than just a 32-bit one. Older 45 + kernels may not support the 64-bit length variant. If this variant is attempted 46 + used on a kernel that doesn't support 64-bit lengths, then the request will get 47 + errored with 48 + .B -EINVAL 49 + in the results field of the CQE. 33 50 34 51 This function prepares an async 35 52 .BR posix_fadvise (2)

+1

vendor/liburing/man/io_uring_prep_fadvise64.3

··· 1 + io_uring_prep_fadvise.3

+70

vendor/liburing/man/io_uring_prep_fixed_fd_install.3

··· 1 + .\" Copyright (C) 2023 Jens Axboe <axboe@kernel.dk> 2 + .\" 3 + .\" SPDX-License-Identifier: LGPL-2.0-or-later 4 + .\" 5 + .TH io_uring_prep_fixed_fd_install 3 "December 8, 2023" "liburing-2.6" "liburing Manual" 6 + .SH NAME 7 + io_uring_prep_fixed_fd_install \- prepare fixed file fd installation request 8 + .SH SYNOPSIS 9 + .nf 10 + .B #include <liburing.h> 11 + .PP 12 + .BI "void io_uring_prep_fixed_fd_install(struct io_uring_sqe *" sqe "," 13 + .BI " int " fd "," 14 + .BI " unsigned int " flags ");" 15 + .fi 16 + .SH DESCRIPTION 17 + .PP 18 + The 19 + .BR io_uring_prep_fixed_fd_install (3) 20 + helper prepares a fixed file descriptor installation. The submission queue entry 21 + .I sqe 22 + is setup to install the direct/fixed file descriptor 23 + .I fd 24 + with the specified 25 + .I flags 26 + file installation flags. 27 + 28 + One use case of direct/fixed file descriptors is to turn a regular file 29 + descriptor into a direct one, reducing the overhead of any request that 30 + needs to access this file. This helper provides a way to go the other way, 31 + turning a direct descriptor into a regular file descriptor that can then 32 + subsequently be used by regular system calls that take a normal file descriptor. 33 + This can be handy if no regular file descriptor exists for this direct 34 + descriptor. Either because it was instantiated directly as a fixed descriptor, 35 + or because the regular file was closed with 36 + .BR close (2) 37 + after being turned into a direct descriptor. 38 + 39 + Upon successful return of this request, both a normal and fixed file descriptor 40 + exists for the same file. Either one of them may be used to access the file. 41 + Either one of them may be closed without affecting the other one. 42 + 43 + .I flags 44 + may be either zero, or set to 45 + .B IORING_FIXED_FD_NO_CLOEXEC 46 + to indicate that the new regular file descriptor should not be closed during 47 + exec. By default, 48 + .B O_CLOEXEC 49 + will be set on the new descriptor otherwise. Setting this field to anything but 50 + those two values will result in the request being failed with 51 + .B -EINVAL 52 + in the CQE 53 + .I res 54 + field. 55 + 56 + .SH RETURN VALUE 57 + None 58 + .SH ERRORS 59 + The CQE 60 + .I res 61 + field will contain the result of the operation, which in this case will be the 62 + value of the new regular file descriptor. In case of failure, a negative value 63 + is returned. 64 + .SH SEE ALSO 65 + .BR io_uring_get_sqe (3), 66 + .BR io_uring_submit (3), 67 + .BR io_uring_register_files (3), 68 + .BR io_uring_unregister_files (3), 69 + .BR io_uring_prep_close_direct (3), 70 + .BR io_uring_prep_openat_direct (3)

+48

vendor/liburing/man/io_uring_prep_ftruncate.3

··· 1 + .\" Copyright (C) 2024 Tony Solomonik <tony.solomonik@gmail.com> 2 + .\" 3 + .\" SPDX-License-Identifier: LGPL-2.0-or-later 4 + .\" 5 + .TH io_uring_prep_ftruncate 3 "January 23, 2024" "liburing-2.6" "liburing Manual" 6 + .SH NAME 7 + io_uring_prep_ftruncate \- prepare an ftruncate request 8 + .SH SYNOPSIS 9 + .nf 10 + .B #include <liburing.h> 11 + .PP 12 + .BI "void io_uring_prep_ftruncate(struct io_uring_sqe *" sqe "," 13 + .BI " int " fd "," 14 + .BI " loff_t " len ");" 15 + .fi 16 + .SH DESCRIPTION 17 + .PP 18 + The 19 + .BR io_uring_prep_ftruncate (3) 20 + function prepares an ftruncate request. The submission queue entry 21 + .I sqe 22 + is setup to use the file descriptor 23 + .I fd 24 + that should get truncated to the length indicated by the 25 + .I len 26 + argument. 27 + 28 + .SH RETURN VALUE 29 + None 30 + .SH ERRORS 31 + The CQE 32 + .I res 33 + field will contain the result of the operation. See the related man page for 34 + details on possible values. Note that where synchronous system calls will return 35 + .B -1 36 + on failure and set 37 + .I errno 38 + to the actual error value, io_uring never uses 39 + .IR errno . 40 + Instead it returns the negated 41 + .I errno 42 + directly in the CQE 43 + .I res 44 + field. 45 + .SH SEE ALSO 46 + .BR io_uring_get_sqe (3), 47 + .BR io_uring_submit (3), 48 + .BR ftruncate (2),

+92

vendor/liburing/man/io_uring_prep_futex_wait.3

··· 1 + .\" Copyright (C) 2022 Jens Axboe <axboe@kernel.dk> 2 + .\" 3 + .\" SPDX-License-Identifier: LGPL-2.0-or-later 4 + .\" 5 + .TH io_uring_prep_futex_wait 3 "Sep 29, 2023" "liburing-2.5" "liburing Manual" 6 + .SH NAME 7 + io_uring_prep_futex_wait \- prepare a futex wait request 8 + .SH SYNOPSIS 9 + .nf 10 + .B #include <linux/futex.h> 11 + .B #include <unistd.h> 12 + .B #include <liburing.h> 13 + .PP 14 + .BI "void io_uring_prep_futex_wait(struct io_uring_sqe *" sqe "," 15 + .BI " uint32_t *" futex "," 16 + .BI " uint64_t " val "," 17 + .BI " uint64_t " mask "," 18 + .BI " uint32_t " futex_flags "," 19 + .BI " unsigned int " flags ");" 20 + .fi 21 + .SH DESCRIPTION 22 + .PP 23 + The 24 + .BR io_uring_prep_futex_wait (3) 25 + function prepares a futex wait request. The submission queue entry 26 + .I sqe 27 + is setup for waiting on a futex at address 28 + .I futex 29 + and which still has the value 30 + .I val 31 + and with 32 + .BR futex2 (2) 33 + flags of 34 + .I futex_flags 35 + and io_uring futex flags of 36 + .I flags . 37 + 38 + .I mask 39 + can be set to a specific bitset mask, which will be matched by the waking 40 + side to decide who to wake up. To always get woken, an application may use 41 + .B FUTEX_BITSET_MATCH_ANY . 42 + 43 + .I futex_flags 44 + follows the 45 + .BR futex2 (2) 46 + flags, not the 47 + .BR futex (2) 48 + v1 interface flags. 49 + 50 + .I flags 51 + are currently unused and hence 52 + .B 0 53 + must be passed. 54 + 55 + This function prepares an async 56 + .BR futex (2) 57 + wait request. See that man page for details. Note that the io_uring futex 58 + wait request is similar to the 59 + .B FUTEX_WAIT_BITSET 60 + operation, as 61 + .B FUTEX_WAIT 62 + is a strict subset of that. 63 + 64 + .SH RETURN VALUE 65 + None 66 + .SH ERRORS 67 + The CQE 68 + .I res 69 + field will contain the result of the operation. See the related man page for 70 + details on possible values. Note that where synchronous system calls will return 71 + .B -1 72 + on failure and set 73 + .I errno 74 + to the actual error value, io_uring never uses 75 + .IR errno . 76 + Instead it returns the negated 77 + .I errno 78 + directly in the CQE 79 + .I res 80 + field. 81 + .SH NOTES 82 + Unlike the sync futex syscalls that wait on a futex, io_uring does not support 83 + passing in a timeout for the request. Instead, applications are encouraged 84 + to use a linked timeout to abort the futex request at a given time, if desired. 85 + .SH SEE ALSO 86 + .BR io_uring_get_sqe (3), 87 + .BR io_uring_submit (3), 88 + .BR io_uring_prep_futex_waitv (3), 89 + .BR io_uring_prep_futex_wake (3), 90 + .BR io_uring_prep_link_timeout (3), 91 + .BR futex (2) 92 + .BR futex2 (2)

+76

vendor/liburing/man/io_uring_prep_futex_waitv.3

··· 1 + .\" Copyright (C) 2022 Jens Axboe <axboe@kernel.dk> 2 + .\" 3 + .\" SPDX-License-Identifier: LGPL-2.0-or-later 4 + .\" 5 + .TH io_uring_prep_futex_waitv 3 "Sep 29, 2023" "liburing-2.5" "liburing Manual" 6 + .SH NAME 7 + io_uring_prep_futex_waitv \- prepare a futex waitv request 8 + .SH SYNOPSIS 9 + .nf 10 + .B #include <linux/futex.h> 11 + .B #include <unistd.h> 12 + .B #include <liburing.h> 13 + .PP 14 + .BI "void io_uring_prep_futex_waitv(struct io_uring_sqe *" sqe "," 15 + .BI " struct futex_waitv *" futexv "," 16 + .BI " uint32_t " nr_futex "," 17 + .BI " unsigned int " flags ");" 18 + .fi 19 + .SH DESCRIPTION 20 + .PP 21 + The 22 + .BR io_uring_prep_futex_waitv (3) 23 + function prepares a futex wait request for multiple futexes at the same time. 24 + The submission queue entry 25 + .I sqe 26 + is setup for waiting on all futexes given by 27 + .I futexv 28 + and 29 + .I nr_futex 30 + is the number of futexes in that array. 31 + .I flags 32 + must be set to the io_uring specific futex flags. 33 + 34 + Unlike 35 + .BR io_uring_prep_futex_wait (3), 36 + the desired bitset mask and values are passed in 37 + .IR futexv . 38 + 39 + .I flags 40 + are currently unused and hence 41 + .B 0 42 + must be passed. 43 + 44 + This function prepares an async 45 + .BR futex (2) 46 + waitv request. See that man page for details. 47 + 48 + .SH RETURN VALUE 49 + None 50 + .SH ERRORS 51 + The CQE 52 + .I res 53 + field will contain the result of the operation. See the related man page for 54 + details on possible values. Note that where synchronous system calls will return 55 + .B -1 56 + on failure and set 57 + .I errno 58 + to the actual error value, io_uring never uses 59 + .IR errno . 60 + Instead it returns the negated 61 + .I errno 62 + directly in the CQE 63 + .I res 64 + field. 65 + .SH NOTES 66 + Unlike the sync futex syscalls that wait on a futex, io_uring does not support 67 + passing in a timeout for the request. Instead, applications are encouraged 68 + to use a linked timeout to abort the futex request at a given time, if desired. 69 + .SH SEE ALSO 70 + .BR io_uring_get_sqe (3), 71 + .BR io_uring_submit (3), 72 + .BR io_uring_prep_futex_wait (3), 73 + .BR io_uring_prep_futex_wake (3), 74 + .BR io_uring_prep_link_timeout (3), 75 + .BR futex (2) 76 + .BR futex2 (2)

+84

vendor/liburing/man/io_uring_prep_futex_wake.3

··· 1 + .\" Copyright (C) 2022 Jens Axboe <axboe@kernel.dk> 2 + .\" 3 + .\" SPDX-License-Identifier: LGPL-2.0-or-later 4 + .\" 5 + .TH io_uring_prep_futex_wake 3 "Sep 29, 2023" "liburing-2.5" "liburing Manual" 6 + .SH NAME 7 + io_uring_prep_futex_wake \- prepare a futex wake request 8 + .SH SYNOPSIS 9 + .nf 10 + .B #include <linux/futex.h> 11 + .B #include <unistd.h> 12 + .B #include <liburing.h> 13 + .PP 14 + .BI "void io_uring_prep_futex_wake(struct io_uring_sqe *" sqe "," 15 + .BI " uint32_t *" futex "," 16 + .BI " uint64_t " val "," 17 + .BI " uint64_t " mask "," 18 + .BI " uint32_t " futex_flags "," 19 + .BI " unsigned int " flags ");" 20 + .fi 21 + .SH DESCRIPTION 22 + .PP 23 + The 24 + .BR io_uring_prep_futex_wake (3) 25 + function prepares a futex wake request. The submission queue entry 26 + .I sqe 27 + is setup for waking any waiters on the futex indicated by 28 + .I futex 29 + and at most 30 + .I val 31 + futexes. 32 + .I futex_flags 33 + indicates the 34 + .BR futex2 (2) 35 + modifier flags, and io_uring futex flags of 36 + .I flags . 37 + 38 + If a given bitset for who to wake is desired, then that must be set in 39 + .I mask . 40 + Use 41 + .B FUTEX_BITSET_MATCH_ANY 42 + to match any waiter on the given futex. 43 + 44 + .I flags 45 + are currently unused and hence 46 + .B 0 47 + must be passed. 48 + 49 + This function prepares an async 50 + .BR futex (2) 51 + wake request. See that man page for details. Note that the io_uring futex 52 + wake request is similar to the 53 + .B FUTEX_WAKE_BITSET 54 + operation, as 55 + .B FUTEX_WAKE 56 + is a strict subset of that. 57 + 58 + .SH RETURN VALUE 59 + None 60 + .SH ERRORS 61 + The CQE 62 + .I res 63 + field will contain the result of the operation. On success, the value will be 64 + the index into 65 + .I futexv 66 + which received a wakeup. See the related man page for details on possible 67 + values for errors. Note that where synchronous system calls will return 68 + .B -1 69 + on failure and set 70 + .I errno 71 + to the actual error value, io_uring never uses 72 + .IR errno . 73 + Instead it returns the negated 74 + .I errno 75 + directly in the CQE 76 + .I res 77 + field. 78 + .SH SEE ALSO 79 + .BR io_uring_get_sqe (3), 80 + .BR io_uring_submit (3), 81 + .BR io_uring_prep_futex_wait (3), 82 + .BR io_uring_prep_futex_waitv (3), 83 + .BR futex (2) 84 + .BR futex2 (2)

+8 -8

vendor/liburing/man/io_uring_prep_link_timeout.3

··· 15 15 .fi 16 16 .SH DESCRIPTION 17 17 .PP 18 - The 18 + The 19 19 .BR io_uring_prep_link_timeout (3) 20 - function prepares a timeout request for linked sqes. The submission queue entry 20 + function prepares a timeout request for linked sqes. The submission queue entry 21 21 .I sqe 22 22 is setup a timeout specified by 23 23 .IR ts . ··· 58 58 Consider an expired timeout a success in terms of the posted completion. 59 59 .PP 60 60 61 - It is invalid to create a chain (linked sqes) consisting only of a link timeout 62 - request. If all the requests in the chain are completed before timeout, then the 63 - link timeout request gets cancelled. Upon timeout, all the uncompleted requests 64 - in the chain get cancelled. 61 + It is invalid to create a chain (linked sqes) consisting only of a link timeout 62 + request. If all the requests in the chain are completed before timeout, then the 63 + link timeout request gets canceled. Upon timeout, all the uncompleted requests 64 + in the chain get canceled. 65 65 66 66 .SH RETURN VALUE 67 67 None ··· 78 78 The specified timeout occurred and triggered the completion event. 79 79 .TP 80 80 .B -EINVAL 81 - One of the fields set in the SQE was invalid. For example, two clock sources 81 + One of the fields set in the SQE was invalid. For example, two clock sources 82 82 where given, or the specified timeout seconds or nanoseconds where < 0. 83 83 .TP 84 84 .B -EFAULT 85 85 io_uring was unable to access the data specified by ts. 86 86 .TP 87 87 .B -ECANCELED 88 - The timeout was canceled because all submitted requests were completed successfully 88 + The timeout was canceled because all submitted requests were completed successfully 89 89 or one of the requests resulted in failure. 90 90 91 91

+52

vendor/liburing/man/io_uring_prep_listen.3

··· 1 + .\" Copyright (C) 2024 SUSE LLC. 2 + .\" 3 + .\" SPDX-License-Identifier: LGPL-2.0-or-later 4 + .\" 5 + .TH io_uring_prep_listen 3 "Jun 3, 2024" "liburing-2.7" "liburing Manual" 6 + .SH NAME 7 + io_uring_prep_listen \- prepare a listen request 8 + .SH SYNOPSIS 9 + .nf 10 + .B #include <sys/socket.h> 11 + .B #include <liburing.h> 12 + .PP 13 + .BI "void io_uring_prep_listen(struct io_uring_sqe *" sqe "," 14 + .BI " int " sockfd "," 15 + .BI " int" backlog ");" 16 + .fi 17 + .SH DESCRIPTION 18 + The 19 + .BR io_uring_prep_listen (3) 20 + function prepares a listen request. The submission queue entry 21 + .I sqe 22 + is setup to place the socket file descriptor pointed by 23 + .IR sockfd 24 + into a state to accept incoming connections. The parameter 25 + .IR backlog , 26 + defines the maximum length of the queue of pending connections. 27 + 28 + This function prepares an async 29 + .BR listen (2) 30 + request. See that man page for details. 31 + 32 + .SH RETURN VALUE 33 + None 34 + .SH ERRORS 35 + The CQE 36 + .I res 37 + field will contain the result of the operation. See the related man page for 38 + details on possible values. Note that where synchronous system calls will return 39 + .B -1 40 + on failure and set 41 + .I errno 42 + to the actual error value, io_uring never uses 43 + .IR errno . 44 + Instead it returns the negated 45 + .I errno 46 + directly in the CQE 47 + .I res 48 + field. 49 + .SH SEE ALSO 50 + .BR io_uring_get_sqe (3), 51 + .BR io_uring_submit (3), 52 + .BR listen (2)

+17 -1

vendor/liburing/man/io_uring_prep_madvise.3

··· 12 12 .PP 13 13 .BI "void io_uring_prep_madvise(struct io_uring_sqe *" sqe "," 14 14 .BI " void *" addr "," 15 - .BI " off_t " len "," 15 + .BI " __u32 " len "," 16 16 .BI " int " advice ");" 17 + .BI " 18 + .BI "void io_uring_prep_madvise64(struct io_uring_sqe *" sqe "," 19 + .BI " void *" addr "," 20 + .BI " __u64 " len "," 21 + .BI " int " advice ");" 17 22 .fi 18 23 .SH DESCRIPTION 19 24 .PP ··· 27 32 .I len 28 33 length in bytes, giving it the advise located in 29 34 .IR advice . 35 + 36 + The 37 + .BR io_uring_prep_madvise64 (3) 38 + function works like 39 + .BR io_uring_prep_madvise (3) 40 + except that it takes a 64-bit length rather than just a 32-bit one. Older 41 + kernels may not support the 64-bit length variant. If this variant is attempted 42 + used on a kernel that doesn't support 64-bit lengths, then the request will get 43 + errored with 44 + .B -EINVAL 45 + in the results field of the CQE. 30 46 31 47 This function prepares an async 32 48 .BR madvise (2)

+1

vendor/liburing/man/io_uring_prep_madvise64.3

··· 1 + io_uring_prep_madvise.3

+1 -1

vendor/liburing/man/io_uring_prep_poll_add.3

··· 34 34 has triggered, a completion CQE is posted and no more events will be generated 35 35 by the poll request. 36 36 .BR io_uring_prep_poll_multishot (3) 37 - behaves identically in terms of events, but it persist across notifications 37 + behaves identically in terms of events, but it persists across notifications 38 38 and will repeatedly post notifications for the same registration. A CQE 39 39 posted from a multishot poll request will have 40 40 .B IORING_CQE_F_MORE

+13 -1

vendor/liburing/man/io_uring_prep_poll_update.3

··· 40 40 If set, the poll update request will replace the existing events being waited 41 41 for with the ones specified in the 42 42 .I poll_mask 43 - argument to the function. 43 + argument to the function. Note that only the lower 16 bits of events can 44 + be updated. This includes things like 45 + .B EPOLLIN 46 + and 47 + .B EPOLLOUT . 48 + Higher order masks/settings are included as internal state, and cannot be 49 + modified. That includes settings like 50 + .B EPOLLONESHOT , 51 + .B EPOLLEXCLUSIVE , 52 + and 53 + .B EPOLLET . 54 + If an application wishes to modify these, it must cancel/remove the existing 55 + poll request and arm a new one. 44 56 .TP 45 57 .B IORING_POLL_UPDATE_USER_DATA 46 58 If set, the poll update request will update the existing user_data of the

+98

vendor/liburing/man/io_uring_prep_read_multishot.3

··· 1 + .\" Copyright (C) 2023 Jens Axboe <axboe@kernel.dk> 2 + .\" 3 + .\" SPDX-License-Identifier: LGPL-2.0-or-later 4 + .\" 5 + .TH io_uring_prep_read_multishot 3 "September 12, 2023" "liburing-2.5" "liburing Manual" 6 + .SH NAME 7 + io_uring_prep_read_multishot \- prepare I/O read multishot request 8 + .SH SYNOPSIS 9 + .nf 10 + .B #include <liburing.h> 11 + .PP 12 + .BI "void io_uring_prep_read_multishot(struct io_uring_sqe *" sqe "," 13 + .BI " int " fd "," 14 + .BI " unsigned " nbytes "," 15 + .BI " __u64 " offset "," 16 + .BI " int " buf_group ");" 17 + .fi 18 + .SH DESCRIPTION 19 + .PP 20 + The 21 + .BR io_uring_prep_read_multishot (3) 22 + helper prepares an IO read multishot request. The submission queue entry 23 + .I sqe 24 + is setup to use the file descriptor 25 + .I fd 26 + to start reading 27 + into a buffer from the provided buffer group with ID 28 + .I buf_group 29 + at the specified 30 + .IR offset . 31 + 32 + .I nbytes 33 + must be set to zero, as the size read will be given by the size of the 34 + buffers in the indicated buffer group IO. 35 + 36 + On files that are not capable of seeking, the offset must be 0 or -1. 37 + 38 + If 39 + .I nbytes 40 + exceeds the size of the buffers in the specified buffer group, or if 41 + .I nbytes 42 + is 43 + .B 0 , 44 + then the size of the buffer in that group will be used for the transfer. 45 + 46 + A multishot read request will repeatedly trigger a completion event 47 + whenever data is available to read from the file. Because of that, 48 + this type of request can only be used with a file type that is pollable. 49 + Examples of that include pipes, tun devices, etc. If used with a regular 50 + file, or a wrong file type in general, the request will fail with 51 + .B -EBADFD 52 + in the CQE 53 + .I res 54 + field. 55 + 56 + Since multishot requests repeatedly trigger completion events as data 57 + arrives, it must be used with provided buffers. With provided buffers, the 58 + application provides buffers to io_uring upfront, and then the kernel picks 59 + a buffer from the specified group in 60 + .I buf_group 61 + when the request is ready to transfer data. 62 + 63 + A multishot request will persist as long as no errors are encountered 64 + doing handling of the request. For each CQE posted on behalf of this request, 65 + the CQE 66 + .I flags 67 + will have 68 + .B IORING_CQE_F_MORE 69 + set if the application should expect more completions from this request. 70 + If this flag isn't set, then that signifies termination of the multishot 71 + read request. 72 + 73 + After the read has been prepared it can be submitted with one of the submit 74 + functions. 75 + 76 + .SH RETURN VALUE 77 + None 78 + .SH ERRORS 79 + The CQE 80 + .I res 81 + field will contain the result of the operation. See the related man page for 82 + details on possible values. Note that where synchronous system calls will return 83 + .B -1 84 + on failure and set 85 + .I errno 86 + to the actual error value, io_uring never uses 87 + .IR errno . 88 + Instead it returns the negated 89 + .I errno 90 + directly in the CQE 91 + .I res 92 + field. 93 + .SH SEE ALSO 94 + .BR io_uring_get_sqe (3), 95 + .BR io_uring_prep_read (3), 96 + .BR io_uring_buf_ring_init (3) 97 + .BR io_uring_buf_ring_add (3), 98 + .BR io_uring_submit (3)

+28

vendor/liburing/man/io_uring_prep_recv.3

··· 80 80 .BR recvmsg (2) 81 81 operation. If set, the socket still had data to be read after the operation 82 82 completed. Both these flags are available since 5.19. 83 + 84 + .TP 85 + .B IORING_RECVSEND_BUNDLE 86 + If set and provided buffers are used with 87 + .B IOSQE_BUFFER_SELECT , 88 + the receive operation will attempt to fill multiple buffers with rather than 89 + just pick a single buffer to fill. To receive multiple buffers in a single 90 + receive, the buffer group ID set in the SQE must be of the ring provided type. 91 + If set, the CQE 92 + .I res 93 + field indicates the total number of bytes received, and the buffer ID returned 94 + in the CQE 95 + .I flags 96 + field indicates the first buffer in the receive operation. The application must 97 + iterate from the indicated initial buffer ID and until all 98 + .I res 99 + bytes have been seen to know which is the last buffer in the receive operation. 100 + The buffer IDs consumed will be contigious from the starting ID, in the order 101 + in which they were added to the buffer ring used. Receiving in bundles can 102 + improve performance when more than one chunk of data is available to receive, 103 + by eliminating redundant round trips through the networking stack. Receive 104 + bundles may be used by both single shot and multishot receive operations. Note 105 + that, internally, bundles rely on the networking stack passing back how much 106 + data is left in the socket after the initial receive. This means that the 107 + initial receive may contain less buffers than what is available, with the 108 + followup receive(s) containing more buffers. Available since 6.10. 83 109 .P 84 110 85 111 .SH RETURN VALUE ··· 102 128 .SH SEE ALSO 103 129 .BR io_uring_get_sqe (3), 104 130 .BR io_uring_submit (3), 131 + .BR io_uring_buf_ring_init (3), 132 + .BR io_uring_buf_ring_add (3), 105 133 .BR recv (2)

+2

vendor/liburing/man/io_uring_prep_recvmsg.3

··· 121 121 .SH SEE ALSO 122 122 .BR io_uring_get_sqe (3), 123 123 .BR io_uring_submit (3), 124 + .BR io_uring_buf_ring_init (3), 125 + .BR io_uring_buf_ring_add (3), 124 126 .BR recvmsg (2)

+49

vendor/liburing/man/io_uring_prep_send.3

··· 22 22 .BI " int " flags "," 23 23 .BI " const struct sockaddr *" addr "," 24 24 .BI " socklen_t " addrlen ");" 25 + .PP 26 + .BI "void io_uring_prep_send_bundle(struct io_uring_sqe *" sqe "," 27 + .BI " int " sockfd "," 28 + .BI " size_t " len "," 29 + .BI " int " flags ");" 25 30 .fi 26 31 .SH DESCRIPTION 27 32 .PP ··· 73 78 .BR sendto (2) 74 79 request. See that man page for details. 75 80 81 + Both of the above send variants may be used with provided buffers, where rather 82 + than pass a buffer in directly with the request, 83 + .B IOSQE_BUFFER_SELECT 84 + is set in the SQE 85 + .I flags 86 + field, and additionally a buffer group ID is set in the SQE 87 + .I buf_group 88 + field. By using provided buffers with send requests, the application can 89 + prevent any kind of reordering of the outgoing data which can otherwise 90 + occur if the application has more than one send request inflight for a single 91 + socket. This provides better pipelining of data, where previously the app 92 + needed to manually serialize sends. 93 + 94 + The bundle version allows the application to issue a single send request, 95 + with a buffer group ID given in the SQE 96 + .I buf_group 97 + field, which keeps sending from that buffer group until it runs out of buffers. 98 + As with any other request using provided buffers, 99 + .B IOSQE_BUFFER_SELECT 100 + must also be set in the SQE 101 + .I flags 102 + before submission. Currently 103 + .I len 104 + must be given as 105 + .B 0 106 + otherwise the request will be errored with 107 + .B -EINVAL 108 + as the result code. Future versions may allow setting 109 + .I 110 + to limit the transfer size. A single CQE is posted for the send, with the result 111 + being how many bytes were sent, on success. When used with provided buffers, 112 + send or send bundle will contain the starting buffer group ID in the CQE 113 + .I flags 114 + field. The number of bytes sent starts from there, and will be in contigious 115 + buffer IDs after that. Send bundle, and send with provided buffers in general, 116 + are available since kernel 6.10, and can be further identified by checking for 117 + the 118 + .B IORING_FEAT_SEND_BUF_SELECT 119 + flag returned in when using 120 + .BR io_uring_init_queue_params (3) 121 + to setup the ring. 122 + 76 123 .SH RETURN VALUE 77 124 None 78 125 .SH ERRORS ··· 93 140 .SH SEE ALSO 94 141 .BR io_uring_get_sqe (3), 95 142 .BR io_uring_submit (3), 143 + .BR io_uring_buf_ring_init (3), 144 + .BR io_uring_buf_ring_add (3), 96 145 .BR send (2) 97 146 .BR sendto (2)

+1

vendor/liburing/man/io_uring_prep_send_bundle.3

··· 1 + io_uring_prep_send.3

+2

vendor/liburing/man/io_uring_prep_sendmsg.3

··· 86 86 .SH SEE ALSO 87 87 .BR io_uring_get_sqe (3), 88 88 .BR io_uring_submit (3), 89 + .BR io_uring_buf_ring_init (3), 90 + .BR io_uring_buf_ring_add (3), 89 91 .BR sendmsg (2)

+1 -1

vendor/liburing/man/io_uring_prep_socket.3

··· 57 57 The 58 58 .BR io_uring_prep_socket_direct_alloc (3) 59 59 helper works just like 60 - .BR io_uring_prep_socket_alloc (3), 60 + .BR io_uring_prep_socket_direct (3), 61 61 except it allocates a new direct descriptor rather than pass a free slot in. It 62 62 is equivalent to using 63 63 .BR io_uring_prep_socket_direct (3)

+7 -6

vendor/liburing/man/io_uring_prep_timeout.3

··· 4 4 .\" 5 5 .TH io_uring_prep_poll_timeout 3 "March 12, 2022" "liburing-2.2" "liburing Manual" 6 6 .SH NAME 7 - io_uring_prep_timeoute \- prepare a timeout request 7 + io_uring_prep_timeout \- prepare a timeout request 8 8 .SH SYNOPSIS 9 9 .nf 10 10 .B #include <liburing.h> ··· 45 45 The realtime clock source should be used. 46 46 .TP 47 47 .B IORING_TIMEOUT_ETIME_SUCCESS 48 - Consider an expired timeout a success in terms of the posted completion. 49 - Normally a timeout that triggers would return in a 48 + Consider an expired timeout a success in terms of the posted completion. This 49 + means it will not sever dependent links, as a failed request normally would. The 50 + posted CQE result code will still contain 50 51 .B -ETIME 51 - CQE 52 + in the 52 53 .I res 53 54 value. 54 55 .TP ··· 57 58 IORING_CQE_F_MORE is set if more timeouts are expected. The value specified in 58 59 .I count 59 60 is the number of repeats. A value of 0 means the timeout is indefinite and can 60 - only be stopped by a removal request. 61 + only be stopped by a removal request. Available since the 6.4 kernel. 61 62 .PP 62 63 The timeout completion event will trigger if either the specified timeout 63 64 has occurred, or the specified number of events to wait for have been posted ··· 77 78 .TP 78 79 .B -EINVAL 79 80 One of the fields set in the SQE was invalid. For example, two clocksources 80 - where given, or the specified timeout seconds or nanoseconds where < 0. 81 + were given, or the specified timeout seconds or nanoseconds were < 0. 81 82 .TP 82 83 .B -EFAULT 83 84 io_uring was unable to access the data specified by

+2 -2

vendor/liburing/man/io_uring_prep_timeout_update.3

··· 4 4 .\" 5 5 .TH io_uring_prep_poll_timeout_update 3 "March 12, 2022" "liburing-2.2" "liburing Manual" 6 6 .SH NAME 7 - io_uring_prep_timeoute_update \- prepare a request to update an existing timeout 7 + io_uring_prep_timeout_update \- prepare a request to update an existing timeout 8 8 .SH SYNOPSIS 9 9 .nf 10 10 .B #include <liburing.h> ··· 78 78 .TP 79 79 .B -EINVAL 80 80 One of the fields set in the SQE was invalid. For example, two clocksources 81 - where given, or the specified timeout seconds or nanoseconds where < 0. 81 + were given, or the specified timeout seconds or nanoseconds were < 0. 82 82 .TP 83 83 .B -EFAULT 84 84 io_uring was unable to access the data specified by

+65

vendor/liburing/man/io_uring_prep_waitid.3

··· 1 + .\" Copyright (C) 2022 Jens Axboe <axboe@kernel.dk> 2 + .\" 3 + .\" SPDX-License-Identifier: LGPL-2.0-or-later 4 + .\" 5 + .TH io_uring_prep_waitid 3 "July 14, 2023" "liburing-2.5" "liburing Manual" 6 + .SH NAME 7 + io_uring_prep_waitid \- prepare a waitid request 8 + .SH SYNOPSIS 9 + .nf 10 + .B #include <sys/wait.h> 11 + .B #include <liburing.h> 12 + .PP 13 + .BI "void io_uring_prep_waitid(struct io_uring_sqe *" sqe "," 14 + .BI " idtype_t " idtype "," 15 + .BI " id_t " id "," 16 + .BI " siginfo_t *" infop "," 17 + .BI " int " options "," 18 + .BI " unsigned int " flags ");" 19 + .fi 20 + .SH DESCRIPTION 21 + .PP 22 + The 23 + .BR io_uring_prep_waitid (3) 24 + function prepares a waitid request. The submission queue entry 25 + .I sqe 26 + is setup to use the 27 + .I idtype 28 + and 29 + .I id 30 + arguments select the child(ren), and 31 + .I options 32 + to specify the child state changes to wait for. Upon successful 33 + return, it fills 34 + .I infop 35 + with information of the child process, if any. 36 + .I flags 37 + is io_uring specific modifier flags. They are currently unused, and hence 38 + .B 0 39 + should be passed. 40 + 41 + This function prepares an async 42 + .BR waitid (2) 43 + request. See that man page for details. 44 + 45 + .SH RETURN VALUE 46 + None 47 + .SH ERRORS 48 + The CQE 49 + .I res 50 + field will contain the result of the operation. See the related man page for 51 + details on possible values. Note that where synchronous system calls will return 52 + .B -1 53 + on failure and set 54 + .I errno 55 + to the actual error value, io_uring never uses 56 + .IR errno . 57 + Instead it returns the negated 58 + .I errno 59 + directly in the CQE 60 + .I res 61 + field. 62 + .SH SEE ALSO 63 + .BR io_uring_get_sqe (3), 64 + .BR io_uring_submit (3), 65 + .BR waitid (2)

+48 -7

vendor/liburing/man/io_uring_queue_init.3

··· 17 17 .BI "int io_uring_queue_init_params(unsigned " entries "," 18 18 .BI " struct io_uring *" ring "," 19 19 .BI " struct io_uring_params *" params ");" 20 + .PP 21 + .BI "int io_uring_queue_init_mem(unsigned " entries "," 22 + .BI " struct io_uring *" ring "," 23 + .BI " struct io_uring_params *" params "," 24 + .BI " void *" buf ", size_t " buf_size ");" 20 25 .fi 21 26 .SH DESCRIPTION 22 27 .PP ··· 35 40 for the SQ ring. This is adequate for regular file or storage workloads, but 36 41 may be too small for networked workloads. The SQ ring entries do not impose a 37 42 limit on the number of in-flight requests that the ring can support, it merely 38 - limits the number that can be submitted to the kernel in one go (batch). if the 43 + limits the number that can be submitted to the kernel in one go (batch). If the 39 44 CQ ring overflows, e.g. more entries are generated than fits in the ring before 40 - the application can reap them, then the ring enters a CQ ring overflow state. 41 - This is indicated by 45 + the application can reap them, then if the kernel supports 46 + .B IORING_FEAT_NODROP 47 + the ring enters a CQ ring overflow state. Otherwise it drops the CQEs and 48 + increments 49 + .I cq.koverflow 50 + in 51 + .I struct io_uring 52 + with the number of CQEs dropped. The overflow state is indicated by 42 53 .B IORING_SQ_CQ_OVERFLOW 43 54 being set in the SQ ring flags. Unless the kernel runs out of available memory, 44 55 entries are not dropped, but it is a much slower completion path and will slow ··· 65 76 will be passed through to the io_uring_setup syscall (see 66 77 .BR io_uring_setup (2)). 67 78 68 - If the 79 + The 69 80 .BR io_uring_queue_init_params (3) 70 - variant is used, then the parameters indicated by 81 + and 82 + .BR io_uring_queue_init_mem (3) 83 + variants will pass the parameters indicated by 71 84 .I params 72 - will be passed straight through to the 85 + straight through to the 73 86 .BR io_uring_setup (2) 74 87 system call. 75 88 89 + The 90 + .BR io_uring_queue_init_mem (3) 91 + variant uses the provided 92 + .I buf 93 + with associated size 94 + .I buf_size 95 + as the memory for the ring, using the 96 + .B IORING_SETUP_NO_MMAP 97 + flag to 98 + .BR io_uring_setup (2). 99 + The buffer passed to 100 + .BR io_uring_queue_init_mem (3) 101 + must already be zeroed. 102 + Typically, the caller should allocate a huge page and pass that in to 103 + .BR io_uring_queue_init_mem (3). 104 + Pages allocated by mmap are already zeroed. 105 + .BR io_uring_queue_init_mem (3) 106 + returns the number of bytes used from the provided buffer, so that the app can 107 + reuse the buffer with the returned offset to put more rings in the same huge 108 + page. 109 + 76 110 On success, the resources held by 77 111 .I ring 78 112 should be released via a corresponding call to 79 113 .BR io_uring_queue_exit (3). 80 114 .SH RETURN VALUE 81 115 .BR io_uring_queue_init (3) 82 - returns 0 on success and 116 + and 117 + .BR io_uring_queue_init_params (3) 118 + return 0 on success and 119 + .BR -errno 120 + on failure. 121 + 122 + .BR io_uring_queue_init_mem (3) 123 + returns the number of bytes used from the provided buffer on success, and 83 124 .BR -errno 84 125 on failure. 85 126 .SH SEE ALSO

+1

vendor/liburing/man/io_uring_queue_init_mem.3

··· 1 + io_uring_queue_init.3

+8 -5

vendor/liburing/man/io_uring_register.2

··· 505 505 pointer to an array of two values, with the values in the array being set to 506 506 the maximum count of workers per NUMA node. Index 0 holds the bounded worker 507 507 count, and index 1 holds the unbounded worker count. On successful return, the 508 - passed in array will contain the previous maximum valyes for each type. If the 508 + passed in array will contain the previous maximum values for each type. If the 509 509 count being passed in is 0, then this command returns the current maximum values 510 510 and doesn't modify the current setting. 511 511 .I nr_args ··· 528 528 system call. 529 529 530 530 .I arg 531 - must be set to an unsigned int pointer to an array of type 532 - .I struct io_uring_rsrc_register 531 + must be set to a pointer to an array of type 532 + .I struct io_uring_rsrc_update 533 533 of 534 534 .I nr_args 535 535 number of entries. The ··· 570 570 .B IORING_REGISTER_RING_FDS. 571 571 572 572 .I arg 573 - must be set to an unsigned int pointer to an array of type 574 - .I struct io_uring_rsrc_register 573 + must be set to a pointer to an array of type 574 + .I struct io_uring_rsrc_update 575 575 of 576 576 .I nr_args 577 577 number of entries. Only the ··· 757 757 .B IORING_REGISTER_RESTRICTIONS 758 758 was specified, but there were already buffers, files, or restrictions 759 759 registered. 760 + .TP 761 + .B EEXIST 762 + The thread performing the registration is invalid. 760 763 .TP 761 764 .B EFAULT 762 765 buffer is outside of the process' accessible address space, or

+5

vendor/liburing/man/io_uring_register_buf_ring.3

··· 133 133 .BR io_uring_register_buf_ring (3) 134 134 returns 0. On failure it returns 135 135 .BR -errno . 136 + .SH NOTES 137 + Unless manual setup is needed, it's recommended to use 138 + .BR io_uring_setup_buf_ring (3) 139 + as it provides a simpler way to setup a provided buffer ring. 136 140 .SH SEE ALSO 137 141 .BR io_uring_buf_ring_init (3), 138 142 .BR io_uring_buf_ring_add (3), 143 + .BR io_uring_setup_buf_ring (3), 139 144 .BR io_uring_buf_ring_advance (3), 140 145 .BR io_uring_buf_ring_cq_advance (3)

+8

vendor/liburing/man/io_uring_register_files.3

··· 60 60 .I nr_files 61 61 number of file descriptors. These files must be updated before use, using eg 62 62 .BR io_uring_register_files_update_tag (3). 63 + Note that if the size of the sparse table exceeds what 64 + .B RLIMIT_NOFILE 65 + allows, then 66 + .BR io_uring_register_files_sparse (3) 67 + will attempt to raise the limit using 68 + .B setrlimit (2) 69 + and retry the operation. If the registration fails after doing that, then an 70 + error will be returned. 63 71 The sparse variant is available in kernels 5.19 and later. 64 72 65 73 Registering a file table is a prerequisite for using any request that uses

+40

vendor/liburing/man/io_uring_register_napi.3

··· 1 + .\" Copyright (C) 2022 Stefan Roesch <shr@devkernel.io> 2 + .\" 3 + .\" SPDX-License-Identifier: LGPL-2.0-or-later 4 + .\" 5 + .TH io_uring_register_napi 3 "November 16, 2022" "liburing-2.4" "liburing Manual" 6 + .SH NAME 7 + io_uring_register_napi \- register NAPI busy poll settings 8 + .SH SYNOPSIS 9 + .nf 10 + .B #include <liburing.h> 11 + .PP 12 + .BI "int io_uring_register_napi(struct io_uring *" ring "," 13 + .BI " struct io_uring_napi *" napi) 14 + .PP 15 + .fi 16 + .SH DESCRIPTION 17 + .PP 18 + The 19 + .BR io_uring_register_napi (3) 20 + function registers the NAPI settings for subsequent operations. The NAPI 21 + settings are specified in the structure that is passed in the 22 + .I napi 23 + parameter. The structure consists of the napi timeout 24 + .I busy_poll_to 25 + (napi busy poll timeout in us) and 26 + .IR prefer_busy_poll . 27 + 28 + Registering a NAPI settings sets the mode when calling the function 29 + napi_busy_loop and corresponds to the SO_PREFER_BUSY_POLL socket 30 + option. 31 + 32 + NAPI busy poll can reduce the network roundtrip time. 33 + 34 + 35 + .SH RETURN VALUE 36 + On success 37 + .BR io_uring_register_napi (3) 38 + return 0. On failure they return 39 + .BR -errno . 40 + It also updates the napi structure with the current values.

+70 -13

vendor/liburing/man/io_uring_setup.2

··· 180 180 If this flag is specified, and if 181 181 .IR entries 182 182 exceeds 183 - .B IORING_MAX_ENTRIES , 183 + .BR IORING_MAX_ENTRIES , 184 184 then 185 185 .IR entries 186 186 will be clamped at 187 - .B IORING_MAX_ENTRIES . 187 + .BR IORING_MAX_ENTRIES . 188 188 If the flag 189 - .BR IORING_SETUP_SQPOLL 189 + .B IORING_SETUP_CQSIZE 190 190 is set, and if the value of 191 191 .IR "struct io_uring_params.cq_entries" 192 192 exceeds 193 - .B IORING_MAX_CQ_ENTRIES , 193 + .BR IORING_MAX_CQ_ENTRIES , 194 194 then it will be clamped at 195 - .B IORING_MAX_CQ_ENTRIES . 195 + .BR IORING_MAX_CQ_ENTRIES . 196 196 .TP 197 197 .B IORING_SETUP_ATTACH_WQ 198 198 This flag should be set in conjunction with ··· 210 210 for details on how to enable the ring. Available since 5.10. 211 211 .TP 212 212 .B IORING_SETUP_SUBMIT_ALL 213 - Normally io_uring stops submitting a batch of request, if one of these requests 213 + Normally io_uring stops submitting a batch of requests, if one of these requests 214 214 results in an error. This can cause submission of less than what is expected, 215 215 if a request ends in error while being submitted. If the ring is created with 216 216 this flag, ··· 300 300 trigger work (for example via any of the CQE waiting functions) or else completions may 301 301 not be delivered. 302 302 Available since 6.1. 303 + .TP 304 + .B IORING_SETUP_NO_MMAP 305 + By default, io_uring allocates kernel memory that callers must subsequently 306 + .BR mmap (2). 307 + If this flag is set, io_uring instead uses caller-allocated buffers; 308 + .I p->cq_off.user_addr 309 + must point to the memory for the sq/cq rings, and 310 + .I p->sq_off.user_addr 311 + must point to the memory for the sqes. 312 + Each allocation must be contiguous memory. 313 + Typically, callers should allocate this memory by using 314 + .BR mmap (2) 315 + to allocate a huge page. 316 + If this flag is set, a subsequent attempt to 317 + .BR mmap (2) 318 + the io_uring file descriptor will fail. 319 + Available since 6.5. 320 + .TP 321 + .B IORING_SETUP_REGISTERED_FD_ONLY 322 + If this flag is set, io_uring will register the ring file descriptor, and 323 + return the registered descriptor index, without ever allocating an unregistered 324 + file descriptor. The caller will need to use 325 + .B IORING_REGISTER_USE_REGISTERED_RING 326 + when calling 327 + .BR io_uring_register (2). 328 + This flag only makes sense when used alongside with 329 + .B IORING_SETUP_NO_MMAP, 330 + which also needs to be set. 331 + Available since 6.5. 332 + 333 + .TP 334 + .B IORING_SETUP_NO_SQARRAY 335 + If this flag is set, entries in the submission queue will be submitted in order, 336 + wrapping around to the first entry after reaching the end of the queue. In other 337 + words, there will be no more indirection via the array of submission entries, 338 + and the queue will be indexed directly by the submission queue tail and the 339 + range of indexed represented by it modulo queue size. Subsequently, the user 340 + should not map the array of submission queue entries, and the corresponding 341 + offset in 342 + .I struct io_sqring_offsets 343 + will be set to zero. Available since 6.6. 344 + 303 345 .PP 304 346 If no flags are specified, the io_uring instance is setup for 305 347 interrupt driven I/O. I/O may be submitted using ··· 323 365 .TP 324 366 .B IORING_FEAT_NODROP 325 367 If this flag is set, io_uring supports almost never dropping completion events. 326 - If a completion event occurs and the CQ ring is full, the kernel stores 327 - the event internally until such a time that the CQ ring has room for more 328 - entries. If this overflow condition is entered, attempting to submit more 329 - IO will fail with the 368 + A dropped event can only occur if the kernel runs out of memory, in which case 369 + you have worse problems than a lost event. Your application and others will 370 + likely get OOM killed anyway. If a completion event occurs and the CQ ring is 371 + full, the kernel stores the event internally until such a time that the CQ ring 372 + has room for more entries. In earlier kernels, if this overflow condition is 373 + entered, attempting to submit more IO would fail with the 330 374 .B -EBUSY 331 375 error value, if it can't flush the overflown events to the CQ ring. If this 332 376 happens, the application must reap events from the CQ ring and attempt the ··· 410 454 can be used for IO commands without needing registration. Available since 411 455 kernel 5.11. 412 456 .TP 413 - .B IORING_FEAT_ENTER_EXT_ARG 457 + .B IORING_FEAT_EXT_ARG 414 458 If this flag is set, then the 415 459 .BR io_uring_enter (2) 416 460 system call supports passing in an extended argument instead of just the ··· 496 540 __u32 flags; 497 541 __u32 dropped; 498 542 __u32 array; 499 - __u32 resv[3]; 543 + __u32 resv1; 544 + __u64 user_addr; 500 545 }; 501 546 .EE 502 547 .in ··· 592 637 __u32 overflow; 593 638 __u32 cqes; 594 639 __u32 flags; 595 - __u32 resv[3]; 640 + __u32 resv1; 641 + __u64 user_addr; 596 642 }; 597 643 .EE 598 644 .in ··· 647 693 was specified, but 648 694 .I io_uring_params.cq_entries 649 695 was invalid. 696 + .B IORING_SETUP_REGISTERED_FD_ONLY 697 + was specified, but 698 + .B IORING_SETUP_NO_MMAP 699 + was not. 650 700 .TP 651 701 .B EMFILE 652 702 The per-process limit on the number of open file descriptors has been ··· 666 716 .B IORING_SETUP_SQPOLL 667 717 was specified, but the effective user ID of the caller did not have sufficient 668 718 privileges. 719 + .TP 720 + .B EPERM 721 + .I /proc/sys/kernel/io_uring_disabled 722 + has the value 2, or it has the value 1 and the calling process does not hold the 723 + .B CAP_SYS_ADMIN 724 + capability or is not a member of 725 + .I /proc/sys/kernel/io_uring_group. 669 726 .SH SEE ALSO 670 727 .BR io_uring_register (2), 671 728 .BR io_uring_enter (2)

+8 -2

vendor/liburing/man/io_uring_setup_buf_ring.3

··· 62 62 63 63 .SH RETURN VALUE 64 64 On success 65 - .BR io_uring_register_setup_ring (3) 66 - returns a pointer to the buffe ring. On failure it returns 65 + .BR io_uring_setup_buf_ring (3) 66 + returns a pointer to the buffer ring. On failure it returns 67 67 .BR NULL 68 68 and sets 69 69 .I *ret 70 70 to -errno. 71 + .SH NOTES 72 + Note that even if the kernel supports this feature, registering a provided 73 + buffer ring may still fail with 74 + .B -EINVAL 75 + if the host is a 32-bit architecture and the memory being passed in resides in 76 + high memory. 71 77 .SH SEE ALSO 72 78 .BR io_uring_register_buf_ring (3), 73 79 .BR io_uring_buf_ring_init (3),

+6 -1

vendor/liburing/man/io_uring_submit.3

··· 26 26 .SH RETURN VALUE 27 27 On success 28 28 .BR io_uring_submit (3) 29 - returns the number of submitted submission queue entries. On failure it returns 29 + returns the number of submitted submission queue entries, if SQPOLL is not used. 30 + If SQPOLL is used, the return value may report a higher number of submitted 31 + entries than actually submitted. If the the user requires accurate information 32 + about how many submission queue entries have been successfully submitted, while 33 + using SQPOLL, the user must fall back to repeatedly submitting a single submission 34 + queue entry. On failure it returns 30 35 .BR -errno . 31 36 .SH NOTES 32 37 For any request that passes in data in a struct, that data must remain

+6

vendor/liburing/man/io_uring_submit_and_wait.3

··· 27 27 and prepares the SQE, it can be submitted with 28 28 .BR io_uring_submit_and_wait (3) . 29 29 30 + Ideally used with a ring setup with 31 + .BR IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN 32 + as that will greatly reduce the number of context switches that an application 33 + will see waiting on multiple requests. 34 + 30 35 .SH RETURN VALUE 31 36 On success 32 37 .BR io_uring_submit_and_wait (3) 33 38 returns the number of submitted submission queue entries. On failure it returns 34 39 .BR -errno . 35 40 .SH SEE ALSO 41 + .BR io_uring_queue_init_params (3), 36 42 .BR io_uring_get_sqe (3), 37 43 .BR io_uring_submit (3), 38 44 .BR io_uring_submit_and_wait_timeout (3)

+21 -3

vendor/liburing/man/io_uring_submit_and_wait_timeout.3

··· 28 28 .I ts 29 29 expires. The completion events are stored in the 30 30 .I cqe_ptr 31 - array. The 31 + array. 32 + .PP 33 + The 32 34 .I sigmask 33 - specifies the set of signals to block. The prevailing signal mask is restored 34 - before returning. 35 + specifies the set of signals to block. If set, it is equivalent to atomically 36 + executing the following calls: 37 + .PP 38 + .in +4n 39 + .EX 40 + sigset_t origmask; 35 41 42 + pthread_sigmask(SIG_SETMASK, &sigmask, &origmask); 43 + ret = io_uring_submit_and_wait_timeout(ring, cqe, wait_nr, ts, NULL); 44 + pthread_sigmask(SIG_SETMASK, &origmask, NULL); 45 + .EE 46 + .in 47 + .PP 36 48 After the caller retrieves a submission queue entry (SQE) with 37 49 .BR io_uring_get_sqe (3) 38 50 and prepares the SQE, it can be submitted with 39 51 .BR io_uring_submit_and_wait_timeout (3) . 40 52 53 + Ideally used with a ring setup with 54 + .BR IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN 55 + as that will greatly reduce the number of context switches that an application 56 + will see waiting on multiple requests. 57 + 41 58 .SH RETURN VALUE 42 59 On success 43 60 .BR io_uring_submit_and_wait_timeout (3) ··· 50 67 .B -ETIME 51 68 is returned in this case. 52 69 .SH SEE ALSO 70 + .BR io_uring_queue_init_params (3), 53 71 .BR io_uring_get_sqe (3), 54 72 .BR io_uring_submit (3), 55 73 .BR io_uring_submit_and_wait (3),

+27

vendor/liburing/man/io_uring_unregister_napi.3

··· 1 + .\" Copyright (C) 2022 Stefan Roesch <shr@devkernel.io> 2 + .\" 3 + .\" SPDX-License-Identifier: LGPL-2.0-or-later 4 + .\" 5 + .TH io_uring_unregister_napi 3 "November 16, 2022" "liburing-2.4" "liburing Manual" 6 + .SH NAME 7 + io_uring_unregister_napi \- unregister NAPI busy poll settings 8 + .SH SYNOPSIS 9 + .nf 10 + .B #include <liburing.h> 11 + .PP 12 + .BI "int io_uring_unregister_napi(struct io_uring *" ring "," 13 + .BI " struct io_uring_napi *" napi) 14 + .PP 15 + .fi 16 + .SH DESCRIPTION 17 + .PP 18 + The 19 + .BR io_uring_unregister_napi (3) 20 + function unregisters the NAPI busy poll settings for subsequent operations. 21 + 22 + .SH RETURN VALUE 23 + On success 24 + .BR io_uring_unregister_napi (3) 25 + return 0. On failure they return 26 + .BR -errno . 27 + It also updates the napi structure with the current values.

+6

vendor/liburing/man/io_uring_wait_cqe_nr.3

··· 31 31 the application can retrieve the completion with 32 32 .BR io_uring_wait_cqe (3). 33 33 34 + Ideally used with a ring setup with 35 + .BR IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN 36 + as that will greatly reduce the number of context switches that an application 37 + will see waiting on multiple requests. 38 + 34 39 .SH RETURN VALUE 35 40 On success 36 41 .BR io_uring_wait_cqe_nr (3) ··· 39 44 The return value indicates the result of waiting for a CQE, and it has no 40 45 relation to the CQE result itself. 41 46 .SH SEE ALSO 47 + .BR io_uring_queue_init_params (3), 42 48 .BR io_uring_submit (3), 43 49 .BR io_uring_wait_cqes (3)

+15 -3

vendor/liburing/man/io_uring_wait_cqes.3

··· 25 25 .I ring 26 26 param, waiting for them if necessary or until the timeout 27 27 .I ts 28 - expires. The 28 + expires. 29 + .PP 30 + The 29 31 .I sigmask 30 - specifies the set of signals to block. The prevailing signal mask is restored 31 - before returning. 32 + specifies the set of signals to block. If set, it is equivalent to atomically 33 + executing the following calls: 34 + .PP 35 + .in +4n 36 + .EX 37 + sigset_t origmask; 32 38 39 + pthread_sigmask(SIG_SETMASK, &sigmask, &origmask); 40 + ret = io_uring_wait_cqes(ring, cqe, wait_nr, ts, NULL); 41 + pthread_sigmask(SIG_SETMASK, &origmask, NULL); 42 + .EE 43 + .in 44 + .PP 33 45 The 34 46 .I cqe_ptr 35 47 param is filled in on success with the first CQE. Callers of this function

+22 -5

vendor/liburing/src/Makefile

··· 10 10 override CPPFLAGS += -D_GNU_SOURCE \ 11 11 -Iinclude/ -include ../config-host.h \ 12 12 -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 13 - CFLAGS ?= -g -O3 -Wall -Wextra -fno-stack-protector 13 + CFLAGS ?= -O3 -Wall -Wextra -fno-stack-protector 14 14 override CFLAGS += -Wno-unused-parameter \ 15 15 -DLIBURING_INTERNAL \ 16 16 $(LIBURING_CFLAGS) 17 17 SO_CFLAGS=-fPIC $(CFLAGS) 18 18 L_CFLAGS=$(CFLAGS) 19 - LINK_FLAGS= 19 + LINK_FLAGS=-Wl,-z,defs 20 20 LINK_FLAGS+=$(LDFLAGS) 21 21 ENABLE_SHARED ?= 1 22 22 ··· 45 45 46 46 ifeq ($(CONFIG_NOLIBC),y) 47 47 liburing_srcs += nolibc.c 48 - override CFLAGS += -nostdlib -nodefaultlibs -ffreestanding -fno-builtin 49 - override CPPFLAGS += -nostdlib -nodefaultlibs -ffreestanding -fno-builtin 50 - override LINK_FLAGS += -nostdlib -nodefaultlibs 48 + override CFLAGS += -nostdlib -nodefaultlibs -ffreestanding -fno-builtin -fno-stack-protector 49 + override CPPFLAGS += -nostdlib -nodefaultlibs -ffreestanding -fno-builtin -fno-stack-protector 50 + override LINK_FLAGS += -nostdlib -nodefaultlibs $(libgcc_link_flag) 51 51 endif 52 52 53 53 override CPPFLAGS += -MT "$@" -MMD -MP -MF "$@.d" ··· 99 99 ln -sf $(relativelibdir)$(libname) $(libdevdir)/liburing.so 100 100 ln -sf $(ffi_libname) $(libdir)/$(ffi_soname) 101 101 ln -sf $(relativelibdir)$(ffi_libname) $(libdevdir)/liburing-ffi.so 102 + endif 103 + 104 + uninstall: 105 + @rm -f $(includedir)/liburing/io_uring.h 106 + @rm -f $(includedir)/liburing.h 107 + @rm -f $(includedir)/liburing/compat.h 108 + @rm -f $(includedir)/liburing/barrier.h 109 + @rm -f $(includedir)/liburing/io_uring_version.h 110 + @rm -f $(libdevdir)/liburing.a 111 + @rm -f $(libdevdir)/liburing-ffi.a 112 + ifeq ($(ENABLE_SHARED),1) 113 + @rm -f $(libdir)/$(libname) 114 + @rm -f $(libdir)/$(ffi_libname) 115 + @rm -f $(libdir)/$(soname) 116 + @rm -f $(libdevdir)/liburing.so 117 + @rm -f $(libdir)/$(ffi_soname) 118 + @rm -f $(libdevdir)/liburing-ffi.so 102 119 endif 103 120 104 121 clean:

-1

vendor/liburing/src/arch/aarch64/lib.h

··· 4 4 #define LIBURING_ARCH_AARCH64_LIB_H 5 5 6 6 #include <elf.h> 7 - #include <sys/auxv.h> 8 7 #include "../../syscall.h" 9 8 10 9 static inline long __get_page_size(void)

+48

vendor/liburing/src/arch/riscv64/lib.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + 3 + #ifndef LIBURING_ARCH_RISCV64_LIB_H 4 + #define LIBURING_ARCH_RISCV64_LIB_H 5 + 6 + #include <elf.h> 7 + #include <sys/auxv.h> 8 + #include "../../syscall.h" 9 + 10 + static inline long __get_page_size(void) 11 + { 12 + Elf64_Off buf[2]; 13 + long ret = 4096; 14 + int fd; 15 + 16 + fd = __sys_open("/proc/self/auxv", O_RDONLY, 0); 17 + if (fd < 0) 18 + return ret; 19 + 20 + while (1) { 21 + ssize_t x; 22 + 23 + x = __sys_read(fd, buf, sizeof(buf)); 24 + if (x < (long) sizeof(buf)) 25 + break; 26 + 27 + if (buf[0] == AT_PAGESZ) { 28 + ret = buf[1]; 29 + break; 30 + } 31 + } 32 + 33 + __sys_close(fd); 34 + return ret; 35 + } 36 + 37 + static inline long get_page_size(void) 38 + { 39 + static long cache_val; 40 + 41 + if (cache_val) 42 + return cache_val; 43 + 44 + cache_val = __get_page_size(); 45 + return cache_val; 46 + } 47 + 48 + #endif /* #ifndef LIBURING_ARCH_RISCV64_LIB_H */

+100

vendor/liburing/src/arch/riscv64/syscall.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + 3 + #ifndef LIBURING_ARCH_RISCV64_SYSCALL_H 4 + #define LIBURING_ARCH_RISCV64_SYSCALL_H 5 + 6 + #if defined(__riscv) && __riscv_xlen == 64 7 + 8 + #define __do_syscallM(...) ({ \ 9 + __asm__ volatile ( \ 10 + "ecall" \ 11 + : "=r"(a0) \ 12 + : __VA_ARGS__ \ 13 + : "memory", "a1"); \ 14 + (long) a0; \ 15 + }) 16 + 17 + #define __do_syscallN(...) ({ \ 18 + __asm__ volatile ( \ 19 + "ecall" \ 20 + : "=r"(a0) \ 21 + : __VA_ARGS__ \ 22 + : "memory"); \ 23 + (long) a0; \ 24 + }) 25 + 26 + #define __do_syscall0(__n) ({ \ 27 + register long a7 __asm__("a7") = __n; \ 28 + register long a0 __asm__("a0"); \ 29 + \ 30 + __do_syscallM("r" (a7)); \ 31 + }) 32 + 33 + #define __do_syscall1(__n, __a) ({ \ 34 + register long a7 __asm__("a7") = __n; \ 35 + register __typeof__(__a) a0 __asm__("a0") = __a; \ 36 + \ 37 + __do_syscallM("r" (a7), "0" (a0)); \ 38 + }) 39 + 40 + #define __do_syscall2(__n, __a, __b) ({ \ 41 + register long a7 __asm__("a7") = __n; \ 42 + register __typeof__(__a) a0 __asm__("a0") = __a; \ 43 + register __typeof__(__b) a1 __asm__("a1") = __b; \ 44 + \ 45 + __do_syscallN("r" (a7), "0" (a0), "r" (a1)); \ 46 + }) 47 + 48 + #define __do_syscall3(__n, __a, __b, __c) ({ \ 49 + register long a7 __asm__("a7") = __n; \ 50 + register __typeof__(__a) a0 __asm__("a0") = __a; \ 51 + register __typeof__(__b) a1 __asm__("a1") = __b; \ 52 + register __typeof__(__c) a2 __asm__("a2") = __c; \ 53 + \ 54 + __do_syscallN("r" (a7), "0" (a0), "r" (a1), "r" (a2)); \ 55 + }) 56 + 57 + #define __do_syscall4(__n, __a, __b, __c, __d) ({ \ 58 + register long a7 __asm__("a7") = __n; \ 59 + register __typeof__(__a) a0 __asm__("a0") = __a; \ 60 + register __typeof__(__b) a1 __asm__("a1") = __b; \ 61 + register __typeof__(__c) a2 __asm__("a2") = __c; \ 62 + register __typeof__(__d) a3 __asm__("a3") = __d; \ 63 + \ 64 + __do_syscallN("r" (a7), "0" (a0), "r" (a1), "r" (a2), "r" (a3));\ 65 + }) 66 + 67 + #define __do_syscall5(__n, __a, __b, __c, __d, __e) ({ \ 68 + register long a7 __asm__("a7") = __n; \ 69 + register __typeof__(__a) a0 __asm__("a0") = __a; \ 70 + register __typeof__(__b) a1 __asm__("a1") = __b; \ 71 + register __typeof__(__c) a2 __asm__("a2") = __c; \ 72 + register __typeof__(__d) a3 __asm__("a3") = __d; \ 73 + register __typeof__(__e) a4 __asm__("a4") = __e; \ 74 + \ 75 + __do_syscallN("r" (a7), "0" (a0), "r" (a1), "r" (a2), "r" (a3), \ 76 + "r"(a4)); \ 77 + }) 78 + 79 + #define __do_syscall6(__n, __a, __b, __c, __d, __e, __f) ({ \ 80 + register long a7 __asm__("a7") = __n; \ 81 + register __typeof__(__a) a0 __asm__("a0") = __a; \ 82 + register __typeof__(__b) a1 __asm__("a1") = __b; \ 83 + register __typeof__(__c) a2 __asm__("a2") = __c; \ 84 + register __typeof__(__d) a3 __asm__("a3") = __d; \ 85 + register __typeof__(__e) a4 __asm__("a4") = __e; \ 86 + register __typeof__(__f) a5 __asm__("a5") = __f; \ 87 + \ 88 + __do_syscallN("r" (a7), "0" (a0), "r" (a1), "r" (a2), "r" (a3), \ 89 + "r" (a4), "r"(a5)); \ 90 + }) 91 + 92 + #include "../syscall-defs.h" 93 + 94 + #else /* #if defined(__riscv) && __riscv_xlen == 64 */ 95 + 96 + #include "../generic/syscall.h" 97 + 98 + #endif /* #if defined(__riscv) && __riscv_xlen == 64 */ 99 + 100 + #endif /* #ifndef LIBURING_ARCH_RISCV64_SYSCALL_H */

+164 -22

vendor/liburing/src/include/liburing.h

··· 2 2 #ifndef LIB_URING_H 3 3 #define LIB_URING_H 4 4 5 - #ifndef _XOPEN_SOURCE 6 - #define _XOPEN_SOURCE 500 /* Required for glibc to expose sigset_t */ 7 - #endif 8 - 9 - #ifndef _GNU_SOURCE 10 - #define _GNU_SOURCE /* Required for musl to expose cpu_set_t */ 11 - #endif 12 - 13 5 #include <sys/socket.h> 14 6 #include <sys/stat.h> 15 7 #include <sys/uio.h> ··· 21 13 #include <fcntl.h> 22 14 #include <sched.h> 23 15 #include <linux/swab.h> 16 + #include <sys/wait.h> 24 17 #include "liburing/compat.h" 25 18 #include "liburing/io_uring.h" 26 19 #include "liburing/io_uring_version.h" ··· 164 157 return (p->ops[op].flags & IO_URING_OP_SUPPORTED) != 0; 165 158 } 166 159 160 + int io_uring_queue_init_mem(unsigned entries, struct io_uring *ring, 161 + struct io_uring_params *p, 162 + void *buf, size_t buf_size); 167 163 int io_uring_queue_init_params(unsigned entries, struct io_uring *ring, 168 164 struct io_uring_params *p); 169 165 int io_uring_queue_init(unsigned entries, struct io_uring *ring, ··· 235 231 int io_uring_register_buf_ring(struct io_uring *ring, 236 232 struct io_uring_buf_reg *reg, unsigned int flags); 237 233 int io_uring_unregister_buf_ring(struct io_uring *ring, int bgid); 234 + int io_uring_buf_ring_head(struct io_uring *ring, int buf_group, uint16_t *head); 238 235 int io_uring_register_sync_cancel(struct io_uring *ring, 239 236 struct io_uring_sync_cancel_reg *reg); 240 237 241 238 int io_uring_register_file_alloc_range(struct io_uring *ring, 242 239 unsigned off, unsigned len); 240 + 241 + int io_uring_register_napi(struct io_uring *ring, struct io_uring_napi *napi); 242 + int io_uring_unregister_napi(struct io_uring *ring, struct io_uring_napi *napi); 243 243 244 244 int io_uring_get_events(struct io_uring *ring); 245 245 int io_uring_submit_and_get_events(struct io_uring *ring); ··· 375 375 sqe->file_index = file_index + 1; 376 376 } 377 377 378 + IOURINGINLINE void io_uring_initialize_sqe(struct io_uring_sqe *sqe) 379 + { 380 + sqe->flags = 0; 381 + sqe->ioprio = 0; 382 + sqe->rw_flags = 0; 383 + sqe->buf_index = 0; 384 + sqe->personality = 0; 385 + sqe->file_index = 0; 386 + sqe->addr3 = 0; 387 + sqe->__pad2[0] = 0; 388 + } 389 + 378 390 IOURINGINLINE void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd, 379 391 const void *addr, unsigned len, 380 392 __u64 offset) 381 393 { 382 394 sqe->opcode = (__u8) op; 383 - sqe->flags = 0; 384 - sqe->ioprio = 0; 385 395 sqe->fd = fd; 386 396 sqe->off = offset; 387 397 sqe->addr = (unsigned long) addr; 388 398 sqe->len = len; 389 - sqe->rw_flags = 0; 390 - sqe->buf_index = 0; 391 - sqe->personality = 0; 392 - sqe->file_index = 0; 393 - sqe->addr3 = 0; 394 - sqe->__pad2[0] = 0; 395 399 } 396 400 397 401 /* ··· 665 669 io_uring_prep_rw(IORING_OP_CONNECT, sqe, fd, addr, 0, addrlen); 666 670 } 667 671 672 + IOURINGINLINE void io_uring_prep_bind(struct io_uring_sqe *sqe, int fd, 673 + struct sockaddr *addr, 674 + socklen_t addrlen) 675 + { 676 + io_uring_prep_rw(IORING_OP_BIND, sqe, fd, addr, 0, addrlen); 677 + } 678 + 679 + IOURINGINLINE void io_uring_prep_listen(struct io_uring_sqe *sqe, int fd, 680 + int backlog) 681 + { 682 + io_uring_prep_rw(IORING_OP_LISTEN, sqe, fd, 0, backlog, 0); 683 + } 684 + 668 685 IOURINGINLINE void io_uring_prep_files_update(struct io_uring_sqe *sqe, 669 686 int *fds, unsigned nr_fds, 670 687 int offset) ··· 720 737 io_uring_prep_rw(IORING_OP_READ, sqe, fd, buf, nbytes, offset); 721 738 } 722 739 740 + IOURINGINLINE void io_uring_prep_read_multishot(struct io_uring_sqe *sqe, 741 + int fd, unsigned nbytes, 742 + __u64 offset, int buf_group) 743 + { 744 + io_uring_prep_rw(IORING_OP_READ_MULTISHOT, sqe, fd, NULL, nbytes, 745 + offset); 746 + sqe->buf_group = buf_group; 747 + sqe->flags = IOSQE_BUFFER_SELECT; 748 + } 749 + 723 750 IOURINGINLINE void io_uring_prep_write(struct io_uring_sqe *sqe, int fd, 724 751 const void *buf, unsigned nbytes, 725 752 __u64 offset) ··· 738 765 } 739 766 740 767 IOURINGINLINE void io_uring_prep_fadvise(struct io_uring_sqe *sqe, int fd, 741 - __u64 offset, off_t len, int advice) 768 + __u64 offset, __u32 len, int advice) 742 769 { 743 770 io_uring_prep_rw(IORING_OP_FADVISE, sqe, fd, NULL, (__u32) len, offset); 744 771 sqe->fadvise_advice = (__u32) advice; 745 772 } 746 773 747 774 IOURINGINLINE void io_uring_prep_madvise(struct io_uring_sqe *sqe, void *addr, 748 - off_t length, int advice) 775 + __u32 length, int advice) 749 776 { 750 777 io_uring_prep_rw(IORING_OP_MADVISE, sqe, -1, addr, (__u32) length, 0); 751 778 sqe->fadvise_advice = (__u32) advice; 752 779 } 753 780 781 + IOURINGINLINE void io_uring_prep_fadvise64(struct io_uring_sqe *sqe, int fd, 782 + __u64 offset, off_t len, int advice) 783 + { 784 + io_uring_prep_rw(IORING_OP_FADVISE, sqe, fd, NULL, 0, offset); 785 + sqe->addr = len; 786 + sqe->fadvise_advice = (__u32) advice; 787 + } 788 + 789 + IOURINGINLINE void io_uring_prep_madvise64(struct io_uring_sqe *sqe, void *addr, 790 + off_t length, int advice) 791 + { 792 + io_uring_prep_rw(IORING_OP_MADVISE, sqe, -1, addr, 0, length); 793 + sqe->fadvise_advice = (__u32) advice; 794 + } 795 + 754 796 IOURINGINLINE void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd, 755 797 const void *buf, size_t len, int flags) 756 798 { 757 799 io_uring_prep_rw(IORING_OP_SEND, sqe, sockfd, buf, (__u32) len, 0); 758 800 sqe->msg_flags = (__u32) flags; 801 + } 802 + 803 + IOURINGINLINE void io_uring_prep_send_bundle(struct io_uring_sqe *sqe, 804 + int sockfd, size_t len, int flags) 805 + { 806 + io_uring_prep_send(sqe, sockfd, NULL, len, flags); 807 + sqe->ioprio |= IORING_RECVSEND_BUNDLE; 759 808 } 760 809 761 810 IOURINGINLINE void io_uring_prep_send_set_addr(struct io_uring_sqe *sqe, ··· 1126 1175 } 1127 1176 1128 1177 /* 1178 + * Prepare commands for sockets 1179 + */ 1180 + IOURINGINLINE void io_uring_prep_cmd_sock(struct io_uring_sqe *sqe, 1181 + int cmd_op, 1182 + int fd, 1183 + int level, 1184 + int optname, 1185 + void *optval, 1186 + int optlen) 1187 + { 1188 + io_uring_prep_rw(IORING_OP_URING_CMD, sqe, fd, NULL, 0, 0); 1189 + sqe->optval = (unsigned long) (uintptr_t) optval; 1190 + sqe->optname = optname; 1191 + sqe->optlen = optlen; 1192 + sqe->cmd_op = cmd_op; 1193 + sqe->level = level; 1194 + } 1195 + 1196 + IOURINGINLINE void io_uring_prep_waitid(struct io_uring_sqe *sqe, 1197 + idtype_t idtype, 1198 + id_t id, 1199 + siginfo_t *infop, 1200 + int options, unsigned int flags) 1201 + { 1202 + io_uring_prep_rw(IORING_OP_WAITID, sqe, id, NULL, (unsigned) idtype, 0); 1203 + sqe->waitid_flags = flags; 1204 + sqe->file_index = options; 1205 + sqe->addr2 = (unsigned long) infop; 1206 + } 1207 + 1208 + IOURINGINLINE void io_uring_prep_futex_wake(struct io_uring_sqe *sqe, 1209 + uint32_t *futex, uint64_t val, 1210 + uint64_t mask, uint32_t futex_flags, 1211 + unsigned int flags) 1212 + { 1213 + io_uring_prep_rw(IORING_OP_FUTEX_WAKE, sqe, futex_flags, futex, 0, val); 1214 + sqe->futex_flags = flags; 1215 + sqe->addr3 = mask; 1216 + } 1217 + 1218 + IOURINGINLINE void io_uring_prep_futex_wait(struct io_uring_sqe *sqe, 1219 + uint32_t *futex, uint64_t val, 1220 + uint64_t mask, uint32_t futex_flags, 1221 + unsigned int flags) 1222 + { 1223 + io_uring_prep_rw(IORING_OP_FUTEX_WAIT, sqe, futex_flags, futex, 0, val); 1224 + sqe->futex_flags = flags; 1225 + sqe->addr3 = mask; 1226 + } 1227 + 1228 + struct futex_waitv; 1229 + IOURINGINLINE void io_uring_prep_futex_waitv(struct io_uring_sqe *sqe, 1230 + struct futex_waitv *futex, 1231 + uint32_t nr_futex, 1232 + unsigned int flags) 1233 + { 1234 + io_uring_prep_rw(IORING_OP_FUTEX_WAITV, sqe, 0, futex, nr_futex, 0); 1235 + sqe->futex_flags = flags; 1236 + } 1237 + 1238 + IOURINGINLINE void io_uring_prep_fixed_fd_install(struct io_uring_sqe *sqe, 1239 + int fd, 1240 + unsigned int flags) 1241 + { 1242 + io_uring_prep_rw(IORING_OP_FIXED_FD_INSTALL, sqe, fd, NULL, 0, 0); 1243 + sqe->flags = IOSQE_FIXED_FILE; 1244 + sqe->install_fd_flags = flags; 1245 + } 1246 + 1247 + IOURINGINLINE void io_uring_prep_ftruncate(struct io_uring_sqe *sqe, 1248 + int fd, loff_t len) 1249 + { 1250 + io_uring_prep_rw(IORING_OP_FTRUNCATE, sqe, fd, 0, 0, len); 1251 + } 1252 + 1253 + /* 1129 1254 * Returns number of unconsumed (if SQPOLL) or unsubmitted entries exist in 1130 1255 * the SQ ring 1131 1256 */ 1132 1257 IOURINGINLINE unsigned io_uring_sq_ready(const struct io_uring *ring) 1133 1258 { 1134 - unsigned khead = *ring->sq.khead; 1259 + unsigned khead; 1135 1260 1136 1261 /* 1137 1262 * Without a barrier, we could miss an update and think the SQ wasn't ··· 1140 1265 */ 1141 1266 if (ring->flags & IORING_SETUP_SQPOLL) 1142 1267 khead = io_uring_smp_load_acquire(ring->sq.khead); 1268 + else 1269 + khead = *ring->sq.khead; 1143 1270 1144 1271 /* always use real head, to avoid losing sync for short submit */ 1145 1272 return ring->sq.sqe_tail - khead; ··· 1326 1453 if (ring->flags & IORING_SETUP_SQE128) 1327 1454 shift = 1; 1328 1455 if (!(ring->flags & IORING_SETUP_SQPOLL)) 1329 - head = IO_URING_READ_ONCE(*sq->khead); 1456 + head = *sq->khead; 1330 1457 else 1331 1458 head = io_uring_smp_load_acquire(sq->khead); 1332 1459 ··· 1335 1462 1336 1463 sqe = &sq->sqes[(sq->sqe_tail & sq->ring_mask) << shift]; 1337 1464 sq->sqe_tail = next; 1465 + io_uring_initialize_sqe(sqe); 1338 1466 return sqe; 1339 1467 } 1340 1468 ··· 1386 1514 struct io_uring_buf_ring *br, 1387 1515 int cq_count, int buf_count) 1388 1516 { 1389 - br->tail += buf_count; 1517 + io_uring_buf_ring_advance(br, buf_count); 1390 1518 io_uring_cq_advance(ring, cq_count); 1391 1519 } 1392 1520 ··· 1404 1532 __io_uring_buf_ring_cq_advance(ring, br, count, count); 1405 1533 } 1406 1534 1535 + IOURINGINLINE int io_uring_buf_ring_available(struct io_uring *ring, 1536 + struct io_uring_buf_ring *br, 1537 + unsigned short bgid) 1538 + { 1539 + uint16_t head; 1540 + int ret; 1541 + 1542 + ret = io_uring_buf_ring_head(ring, bgid, &head); 1543 + if (ret) 1544 + return ret; 1545 + 1546 + return (uint16_t) (br->tail - head); 1547 + } 1548 + 1407 1549 #ifndef LIBURING_INTERNAL 1408 1550 IOURINGINLINE struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) 1409 1551 { ··· 1432 1574 #define IO_URING_CHECK_VERSION(major,minor) \ 1433 1575 (major > IO_URING_VERSION_MAJOR || \ 1434 1576 (major == IO_URING_VERSION_MAJOR && \ 1435 - minor >= IO_URING_VERSION_MINOR)) 1577 + minor > IO_URING_VERSION_MINOR)) 1436 1578 1437 1579 #ifdef __cplusplus 1438 1580 }

+98 -4

vendor/liburing/src/include/liburing/io_uring.h

··· 43 43 union { 44 44 __u64 addr; /* pointer to buffer or iovecs */ 45 45 __u64 splice_off_in; 46 + struct { 47 + __u32 level; 48 + __u32 optname; 49 + }; 46 50 }; 47 51 __u32 len; /* buffer size or number of iovecs */ 48 52 union { ··· 65 69 __u32 xattr_flags; 66 70 __u32 msg_ring_flags; 67 71 __u32 uring_cmd_flags; 72 + __u32 waitid_flags; 73 + __u32 futex_flags; 74 + __u32 install_fd_flags; 75 + __u32 nop_flags; 68 76 }; 69 77 __u64 user_data; /* data to be passed back at completion time */ 70 78 /* pack this to avoid bogus arm OABI complaints */ ··· 79 87 union { 80 88 __s32 splice_fd_in; 81 89 __u32 file_index; 90 + __u32 optlen; 82 91 struct { 83 92 __u16 addr_len; 84 93 __u16 __pad3[1]; ··· 89 98 __u64 addr3; 90 99 __u64 __pad2[1]; 91 100 }; 101 + __u64 optval; 92 102 /* 93 103 * If the ring is initialized with IORING_SETUP_SQE128, then 94 104 * this field is used for 80 bytes of arbitrary command data ··· 173 183 */ 174 184 #define IORING_SETUP_DEFER_TASKRUN (1U << 13) 175 185 186 + /* 187 + * Application provides ring memory 188 + */ 189 + #define IORING_SETUP_NO_MMAP (1U << 14) 190 + 191 + /* 192 + * Register the ring fd in itself for use with 193 + * IORING_REGISTER_USE_REGISTERED_RING; return a registered fd index rather 194 + * than an fd. 195 + */ 196 + #define IORING_SETUP_REGISTERED_FD_ONLY (1U << 15) 197 + 198 + /* 199 + * Removes indirection through the SQ index array. 200 + */ 201 + #define IORING_SETUP_NO_SQARRAY (1U << 16) 202 + 176 203 enum io_uring_op { 177 204 IORING_OP_NOP, 178 205 IORING_OP_READV, ··· 223 250 IORING_OP_URING_CMD, 224 251 IORING_OP_SEND_ZC, 225 252 IORING_OP_SENDMSG_ZC, 253 + IORING_OP_READ_MULTISHOT, 254 + IORING_OP_WAITID, 255 + IORING_OP_FUTEX_WAIT, 256 + IORING_OP_FUTEX_WAKE, 257 + IORING_OP_FUTEX_WAITV, 258 + IORING_OP_FIXED_FD_INSTALL, 259 + IORING_OP_FTRUNCATE, 260 + IORING_OP_BIND, 261 + IORING_OP_LISTEN, 226 262 227 263 /* this goes last, obviously */ 228 264 IORING_OP_LAST, ··· 281 317 * ASYNC_CANCEL flags. 282 318 * 283 319 * IORING_ASYNC_CANCEL_ALL Cancel all requests that match the given key 284 - * IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the 320 + * IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the 285 321 * request 'user_data' 286 322 * IORING_ASYNC_CANCEL_ANY Match any request 287 323 * IORING_ASYNC_CANCEL_FD_FIXED 'fd' passed in is a fixed descriptor ··· 313 349 * 0 is reported if zerocopy was actually possible. 314 350 * IORING_NOTIF_USAGE_ZC_COPIED if data was copied 315 351 * (at least partially). 352 + * 353 + * IORING_RECVSEND_BUNDLE Used with IOSQE_BUFFER_SELECT. If set, send wil 354 + * grab as many buffers from the buffer group ID 355 + * given and send them all. The completion result 356 + * will be the number of buffers send, with the 357 + * starting buffer ID in cqe->flags as per usual 358 + * for provided buffer usage. The buffers will be 359 + * contigious from the starting buffer ID. 316 360 */ 317 361 #define IORING_RECVSEND_POLL_FIRST (1U << 0) 318 362 #define IORING_RECV_MULTISHOT (1U << 1) 319 363 #define IORING_RECVSEND_FIXED_BUF (1U << 2) 320 364 #define IORING_SEND_ZC_REPORT_USAGE (1U << 3) 365 + #define IORING_RECVSEND_BUNDLE (1U << 4) 321 366 322 367 /* 323 368 * cqe.res for IORING_CQE_F_NOTIF if ··· 332 377 * accept flags stored in sqe->ioprio 333 378 */ 334 379 #define IORING_ACCEPT_MULTISHOT (1U << 0) 380 + #define IORING_ACCEPT_DONTWAIT (1U << 1) 381 + #define IORING_ACCEPT_POLL_FIRST (1U << 2) 335 382 336 383 /* 337 384 * IORING_OP_MSG_RING command types, stored in sqe->addr ··· 352 399 #define IORING_MSG_RING_FLAGS_PASS (1U << 1) 353 400 354 401 /* 402 + * IORING_OP_FIXED_FD_INSTALL flags (sqe->install_fd_flags) 403 + * 404 + * IORING_FIXED_FD_NO_CLOEXEC Don't mark the fd as O_CLOEXEC 405 + */ 406 + #define IORING_FIXED_FD_NO_CLOEXEC (1U << 0) 407 + 408 + /* 409 + * IORING_OP_NOP flags (sqe->nop_flags) 410 + * 411 + * IORING_NOP_INJECT_RESULT Inject result from sqe->result 412 + */ 413 + #define IORING_NOP_INJECT_RESULT (1U << 0) 414 + 415 + /* 355 416 * IO completion data structure (Completion Queue Entry) 356 417 */ 357 418 struct io_uring_cqe { 358 - __u64 user_data; /* sqe->data submission passed back */ 419 + __u64 user_data; /* sqe->user_data value passed back */ 359 420 __s32 res; /* result code for this event */ 360 421 __u32 flags; 361 422 ··· 406 467 __u32 dropped; 407 468 __u32 array; 408 469 __u32 resv1; 409 - __u64 resv2; 470 + __u64 user_addr; 410 471 }; 411 472 412 473 /* ··· 425 486 __u32 cqes; 426 487 __u32 flags; 427 488 __u32 resv1; 428 - __u64 resv2; 489 + __u64 user_addr; 429 490 }; 430 491 431 492 /* ··· 477 538 #define IORING_FEAT_CQE_SKIP (1U << 11) 478 539 #define IORING_FEAT_LINKED_FILE (1U << 12) 479 540 #define IORING_FEAT_REG_REG_RING (1U << 13) 541 + #define IORING_FEAT_RECVSEND_BUNDLE (1U << 14) 480 542 481 543 /* 482 544 * io_uring_register(2) opcodes and arguments ··· 522 584 523 585 /* register a range of fixed file slots for automatic slot allocation */ 524 586 IORING_REGISTER_FILE_ALLOC_RANGE = 25, 587 + 588 + /* return status information for a buffer group */ 589 + IORING_REGISTER_PBUF_STATUS = 26, 590 + 591 + /* set/clear busy poll settings */ 592 + IORING_REGISTER_NAPI = 27, 593 + IORING_UNREGISTER_NAPI = 28, 525 594 526 595 /* this goes last */ 527 596 IORING_REGISTER_LAST, ··· 649 718 __u64 resv[3]; 650 719 }; 651 720 721 + /* argument for IORING_REGISTER_PBUF_STATUS */ 722 + struct io_uring_buf_status { 723 + __u32 buf_group; /* input */ 724 + __u32 head; /* output */ 725 + __u32 resv[8]; 726 + }; 727 + 728 + /* argument for IORING_(UN)REGISTER_NAPI */ 729 + struct io_uring_napi { 730 + __u32 busy_poll_to; 731 + __u8 prefer_busy_poll; 732 + __u8 pad[3]; 733 + __u64 resv; 734 + }; 735 + 652 736 /* 653 737 * io_uring_restriction->opcode values 654 738 */ ··· 701 785 __u32 controllen; 702 786 __u32 payloadlen; 703 787 __u32 flags; 788 + }; 789 + 790 + /* 791 + * Argument for IORING_OP_URING_CMD when file is a socket 792 + */ 793 + enum { 794 + SOCKET_URING_OP_SIOCINQ = 0, 795 + SOCKET_URING_OP_SIOCOUTQ, 796 + SOCKET_URING_OP_GETSOCKOPT, 797 + SOCKET_URING_OP_SETSOCKOPT, 704 798 }; 705 799 706 800 #ifdef __cplusplus

+1

vendor/liburing/src/int_flags.h

··· 5 5 enum { 6 6 INT_FLAG_REG_RING = 1, 7 7 INT_FLAG_REG_REG_RING = 2, 8 + INT_FLAG_APP_MEM = 4, 8 9 }; 9 10 10 11 #endif

+2

vendor/liburing/src/lib.h

··· 10 10 #include "arch/x86/lib.h" 11 11 #elif defined(__aarch64__) 12 12 #include "arch/aarch64/lib.h" 13 + #elif defined(__riscv) && __riscv_xlen == 64 14 + #include "arch/riscv64/lib.h" 13 15 #else 14 16 /* 15 17 * We don't have nolibc support for this arch. Must use libc!

+29

vendor/liburing/src/liburing-ffi.map

··· 172 172 io_uring_prep_msg_ring_fd; 173 173 io_uring_prep_msg_ring_fd_alloc; 174 174 io_uring_prep_sendto; 175 + io_uring_register_napi; /* Added in 2.6. */ 176 + io_uring_unregister_napi; /* Added in 2.6. */ 175 177 local: 176 178 *; 177 179 }; 180 + 181 + LIBURING_2.5 { 182 + global: 183 + io_uring_queue_init_mem; 184 + io_uring_prep_cmd_sock; /* Added in 2.5, 185 + exported in 2.6. */ 186 + io_uring_prep_read_multishot; /* Added in 2.6. */ 187 + io_uring_prep_waitid; /* Added in 2.6. */ 188 + io_uring_prep_futex_wake; /* Added in 2.6. */ 189 + io_uring_prep_futex_wait; /* Added in 2.6. */ 190 + io_uring_prep_futex_waitv; /* Added in 2.6. */ 191 + } LIBURING_2.4; 192 + 193 + LIBURING_2.6 { 194 + global: 195 + io_uring_prep_fixed_fd_install; 196 + io_uring_buf_ring_available; 197 + io_uring_prep_ftruncate; 198 + io_uring_prep_send_bundle; 199 + } LIBURING_2.5; 200 + 201 + LIBURING_2.7 { 202 + io_uring_prep_fadvise64; 203 + io_uring_prep_madvise64; 204 + io_uring_prep_bind; 205 + io_uring_prep_listen; 206 + } LIBURING_2.6;

+15

vendor/liburing/src/liburing.map

··· 80 80 io_uring_setup_buf_ring; 81 81 io_uring_free_buf_ring; 82 82 } LIBURING_2.3; 83 + 84 + LIBURING_2.5 { 85 + global: 86 + io_uring_queue_init_mem; 87 + } LIBURING_2.4; 88 + 89 + LIBURING_2.6 { 90 + global: 91 + io_uring_buf_ring_head; 92 + io_uring_register_napi; 93 + io_uring_unregister_napi; 94 + } LIBURING_2.5; 95 + 96 + LIBURING_2.7 { 97 + } LIBURING_2.6;

+8 -12

vendor/liburing/src/queue.c

··· 212 212 * Ensure kernel sees the SQE updates before the tail update. 213 213 */ 214 214 if (!(ring->flags & IORING_SETUP_SQPOLL)) 215 - IO_URING_WRITE_ONCE(*sq->ktail, tail); 215 + *sq->ktail = tail; 216 216 else 217 217 io_uring_smp_store_release(sq->ktail, tail); 218 218 } 219 219 /* 220 - * This _may_ look problematic, as we're not supposed to be reading 221 - * SQ->head without acquire semantics. When we're in SQPOLL mode, the 222 - * kernel submitter could be updating this right now. For non-SQPOLL, 223 - * task itself does it, and there's no potential race. But even for 224 - * SQPOLL, the load is going to be potentially out-of-date the very 225 - * instant it's done, regardless or whether or not it's done 226 - * atomically. Worst case, we're going to be over-estimating what 227 - * we can submit. The point is, we need to be able to deal with this 228 - * situation regardless of any perceived atomicity. 229 - */ 230 - return tail - *sq->khead; 220 + * This load needs to be atomic, since sq->khead is written concurrently 221 + * by the kernel, but it doesn't need to be load_acquire, since the 222 + * kernel doesn't store to the submission queue; it advances khead just 223 + * to indicate that it's finished reading the submission queue entries 224 + * so they're available for us to write to. 225 + */ 226 + return tail - IO_URING_READ_ONCE(*sq->khead); 231 227 } 232 228 233 229 /*

+32 -2

vendor/liburing/src/register.c

··· 11 11 static inline int do_register(struct io_uring *ring, unsigned int opcode, 12 12 const void *arg, unsigned int nr_args) 13 13 { 14 - if (ring->int_flags & INT_FLAG_REG_REG_RING) 14 + int fd; 15 + 16 + if (ring->int_flags & INT_FLAG_REG_REG_RING) { 15 17 opcode |= IORING_REGISTER_USE_REGISTERED_RING; 18 + fd = ring->enter_ring_fd; 19 + } else { 20 + fd = ring->ring_fd; 21 + } 16 22 17 - return __sys_io_uring_register(ring->enter_ring_fd, opcode, arg, nr_args); 23 + return __sys_io_uring_register(fd, opcode, arg, nr_args); 18 24 } 19 25 20 26 int io_uring_register_buffers_update_tag(struct io_uring *ring, unsigned off, ··· 320 326 return do_register(ring, IORING_UNREGISTER_PBUF_RING, &reg, 1); 321 327 } 322 328 329 + int io_uring_buf_ring_head(struct io_uring *ring, int buf_group, uint16_t *head) 330 + { 331 + struct io_uring_buf_status buf_status = { 332 + .buf_group = buf_group, 333 + }; 334 + int ret; 335 + 336 + ret = do_register(ring, IORING_REGISTER_PBUF_STATUS, &buf_status, 1); 337 + if (ret) 338 + return ret; 339 + *head = buf_status.head; 340 + return 0; 341 + } 342 + 323 343 int io_uring_register_sync_cancel(struct io_uring *ring, 324 344 struct io_uring_sync_cancel_reg *reg) 325 345 { ··· 336 356 337 357 return do_register(ring, IORING_REGISTER_FILE_ALLOC_RANGE, &range, 0); 338 358 } 359 + 360 + int io_uring_register_napi(struct io_uring *ring, struct io_uring_napi *napi) 361 + { 362 + return do_register(ring, IORING_REGISTER_NAPI, napi, 1); 363 + } 364 + 365 + int io_uring_unregister_napi(struct io_uring *ring, struct io_uring_napi *napi) 366 + { 367 + return do_register(ring, IORING_UNREGISTER_NAPI, napi, 1); 368 + }

+303 -85

vendor/liburing/src/setup.c

··· 5 5 #include "syscall.h" 6 6 #include "liburing.h" 7 7 #include "int_flags.h" 8 + #include "setup.h" 8 9 #include "liburing/compat.h" 9 10 #include "liburing/io_uring.h" 10 11 12 + #define KERN_MAX_ENTRIES 32768 13 + #define KERN_MAX_CQ_ENTRIES (2 * KERN_MAX_ENTRIES) 14 + 15 + static inline int __fls(int x) 16 + { 17 + if (!x) 18 + return 0; 19 + return 8 * sizeof(x) - __builtin_clz(x); 20 + } 21 + 22 + static unsigned roundup_pow2(unsigned depth) 23 + { 24 + return 1U << __fls(depth - 1); 25 + } 26 + 27 + static int get_sq_cq_entries(unsigned entries, struct io_uring_params *p, 28 + unsigned *sq, unsigned *cq) 29 + { 30 + unsigned cq_entries; 31 + 32 + if (!entries) 33 + return -EINVAL; 34 + if (entries > KERN_MAX_ENTRIES) { 35 + if (!(p->flags & IORING_SETUP_CLAMP)) 36 + return -EINVAL; 37 + entries = KERN_MAX_ENTRIES; 38 + } 39 + 40 + entries = roundup_pow2(entries); 41 + if (p->flags & IORING_SETUP_CQSIZE) { 42 + if (!p->cq_entries) 43 + return -EINVAL; 44 + cq_entries = p->cq_entries; 45 + if (cq_entries > KERN_MAX_CQ_ENTRIES) { 46 + if (!(p->flags & IORING_SETUP_CLAMP)) 47 + return -EINVAL; 48 + cq_entries = KERN_MAX_CQ_ENTRIES; 49 + } 50 + cq_entries = roundup_pow2(cq_entries); 51 + if (cq_entries < entries) 52 + return -EINVAL; 53 + } else { 54 + cq_entries = 2 * entries; 55 + } 56 + 57 + *sq = entries; 58 + *cq = cq_entries; 59 + return 0; 60 + } 61 + 11 62 static void io_uring_unmap_rings(struct io_uring_sq *sq, struct io_uring_cq *cq) 12 63 { 13 - __sys_munmap(sq->ring_ptr, sq->ring_sz); 14 - if (cq->ring_ptr && cq->ring_ptr != sq->ring_ptr) 64 + if (sq->ring_sz) 65 + __sys_munmap(sq->ring_ptr, sq->ring_sz); 66 + if (cq->ring_ptr && cq->ring_sz && cq->ring_ptr != sq->ring_ptr) 15 67 __sys_munmap(cq->ring_ptr, cq->ring_sz); 16 68 } 17 69 70 + static void io_uring_setup_ring_pointers(struct io_uring_params *p, 71 + struct io_uring_sq *sq, 72 + struct io_uring_cq *cq) 73 + { 74 + sq->khead = sq->ring_ptr + p->sq_off.head; 75 + sq->ktail = sq->ring_ptr + p->sq_off.tail; 76 + sq->kring_mask = sq->ring_ptr + p->sq_off.ring_mask; 77 + sq->kring_entries = sq->ring_ptr + p->sq_off.ring_entries; 78 + sq->kflags = sq->ring_ptr + p->sq_off.flags; 79 + sq->kdropped = sq->ring_ptr + p->sq_off.dropped; 80 + if (!(p->flags & IORING_SETUP_NO_SQARRAY)) 81 + sq->array = sq->ring_ptr + p->sq_off.array; 82 + 83 + cq->khead = cq->ring_ptr + p->cq_off.head; 84 + cq->ktail = cq->ring_ptr + p->cq_off.tail; 85 + cq->kring_mask = cq->ring_ptr + p->cq_off.ring_mask; 86 + cq->kring_entries = cq->ring_ptr + p->cq_off.ring_entries; 87 + cq->koverflow = cq->ring_ptr + p->cq_off.overflow; 88 + cq->cqes = cq->ring_ptr + p->cq_off.cqes; 89 + if (p->cq_off.flags) 90 + cq->kflags = cq->ring_ptr + p->cq_off.flags; 91 + 92 + sq->ring_mask = *sq->kring_mask; 93 + sq->ring_entries = *sq->kring_entries; 94 + cq->ring_mask = *cq->kring_mask; 95 + cq->ring_entries = *cq->kring_entries; 96 + } 97 + 18 98 static int io_uring_mmap(int fd, struct io_uring_params *p, 19 99 struct io_uring_sq *sq, struct io_uring_cq *cq) 20 100 { ··· 52 132 } 53 133 } 54 134 55 - sq->khead = sq->ring_ptr + p->sq_off.head; 56 - sq->ktail = sq->ring_ptr + p->sq_off.tail; 57 - sq->kring_mask = sq->ring_ptr + p->sq_off.ring_mask; 58 - sq->kring_entries = sq->ring_ptr + p->sq_off.ring_entries; 59 - sq->kflags = sq->ring_ptr + p->sq_off.flags; 60 - sq->kdropped = sq->ring_ptr + p->sq_off.dropped; 61 - sq->array = sq->ring_ptr + p->sq_off.array; 62 - 63 135 size = sizeof(struct io_uring_sqe); 64 136 if (p->flags & IORING_SETUP_SQE128) 65 137 size += 64; ··· 72 144 return ret; 73 145 } 74 146 75 - cq->khead = cq->ring_ptr + p->cq_off.head; 76 - cq->ktail = cq->ring_ptr + p->cq_off.tail; 77 - cq->kring_mask = cq->ring_ptr + p->cq_off.ring_mask; 78 - cq->kring_entries = cq->ring_ptr + p->cq_off.ring_entries; 79 - cq->koverflow = cq->ring_ptr + p->cq_off.overflow; 80 - cq->cqes = cq->ring_ptr + p->cq_off.cqes; 81 - if (p->cq_off.flags) 82 - cq->kflags = cq->ring_ptr + p->cq_off.flags; 83 - 84 - sq->ring_mask = *sq->kring_mask; 85 - sq->ring_entries = *sq->kring_entries; 86 - cq->ring_mask = *cq->kring_mask; 87 - cq->ring_entries = *cq->kring_entries; 147 + io_uring_setup_ring_pointers(p, sq, cq); 88 148 return 0; 89 149 } 90 150 ··· 97 157 __cold int io_uring_queue_mmap(int fd, struct io_uring_params *p, 98 158 struct io_uring *ring) 99 159 { 100 - int ret; 101 - 102 160 memset(ring, 0, sizeof(*ring)); 103 - ret = io_uring_mmap(fd, p, &ring->sq, &ring->cq); 104 - if (!ret) { 105 - ring->flags = p->flags; 106 - ring->ring_fd = ring->enter_ring_fd = fd; 107 - ring->int_flags = 0; 108 - return 0; 109 - } 110 - return ret; 161 + return io_uring_mmap(fd, p, &ring->sq, &ring->cq); 111 162 } 112 163 113 164 /* ··· 145 196 return 0; 146 197 } 147 198 148 - __cold int io_uring_queue_init_params(unsigned entries, struct io_uring *ring, 149 - struct io_uring_params *p) 199 + /* FIXME */ 200 + static size_t huge_page_size = 2 * 1024 * 1024; 201 + 202 + #define KRING_SIZE 64 203 + 204 + /* 205 + * Returns negative for error, or number of bytes used in the buffer on success 206 + */ 207 + static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p, 208 + struct io_uring_sq *sq, struct io_uring_cq *cq, 209 + void *buf, size_t buf_size) 150 210 { 151 - int fd, ret; 211 + unsigned long page_size = get_page_size(); 212 + unsigned sq_entries, cq_entries; 213 + size_t ring_mem, sqes_mem, cqes_mem; 214 + unsigned long mem_used = 0; 215 + void *ptr; 216 + int ret; 217 + 218 + ret = get_sq_cq_entries(entries, p, &sq_entries, &cq_entries); 219 + if (ret) 220 + return ret; 221 + 222 + ring_mem = KRING_SIZE; 223 + 224 + sqes_mem = sq_entries * sizeof(struct io_uring_sqe); 225 + sqes_mem = (sqes_mem + page_size - 1) & ~(page_size - 1); 226 + if (!(p->flags & IORING_SETUP_NO_SQARRAY)) 227 + sqes_mem += sq_entries * sizeof(unsigned); 228 + 229 + cqes_mem = cq_entries * sizeof(struct io_uring_cqe); 230 + if (p->flags & IORING_SETUP_CQE32) 231 + cqes_mem *= 2; 232 + ring_mem += sqes_mem + cqes_mem; 233 + mem_used = ring_mem; 234 + mem_used = (mem_used + page_size - 1) & ~(page_size - 1); 235 + 236 + /* 237 + * A maxed-out number of CQ entries with IORING_SETUP_CQE32 fills a 2MB 238 + * huge page by itself, so the SQ entries won't fit in the same huge 239 + * page. For SQEs, that shouldn't be possible given KERN_MAX_ENTRIES, 240 + * but check that too to future-proof (e.g. against different huge page 241 + * sizes). Bail out early so we don't overrun. 242 + */ 243 + if (!buf && (sqes_mem > huge_page_size || ring_mem > huge_page_size)) 244 + return -ENOMEM; 245 + 246 + if (buf) { 247 + if (mem_used > buf_size) 248 + return -ENOMEM; 249 + ptr = buf; 250 + } else { 251 + int map_hugetlb = 0; 252 + if (sqes_mem <= page_size) 253 + buf_size = page_size; 254 + else { 255 + buf_size = huge_page_size; 256 + map_hugetlb = MAP_HUGETLB; 257 + } 258 + ptr = __sys_mmap(NULL, buf_size, PROT_READ|PROT_WRITE, 259 + MAP_SHARED|MAP_ANONYMOUS|map_hugetlb, 260 + -1, 0); 261 + if (IS_ERR(ptr)) 262 + return PTR_ERR(ptr); 263 + } 264 + 265 + sq->sqes = ptr; 266 + if (mem_used <= buf_size) { 267 + sq->ring_ptr = (void *) sq->sqes + sqes_mem; 268 + /* clear ring sizes, we have just one mmap() to undo */ 269 + cq->ring_sz = 0; 270 + sq->ring_sz = 0; 271 + } else { 272 + int map_hugetlb = 0; 273 + if (ring_mem <= page_size) 274 + buf_size = page_size; 275 + else { 276 + buf_size = huge_page_size; 277 + map_hugetlb = MAP_HUGETLB; 278 + } 279 + ptr = __sys_mmap(NULL, buf_size, PROT_READ|PROT_WRITE, 280 + MAP_SHARED|MAP_ANONYMOUS|map_hugetlb, 281 + -1, 0); 282 + if (IS_ERR(ptr)) { 283 + __sys_munmap(sq->sqes, 1); 284 + return PTR_ERR(ptr); 285 + } 286 + sq->ring_ptr = ptr; 287 + sq->ring_sz = buf_size; 288 + cq->ring_sz = 0; 289 + } 290 + 291 + cq->ring_ptr = (void *) sq->ring_ptr; 292 + p->sq_off.user_addr = (unsigned long) sq->sqes; 293 + p->cq_off.user_addr = (unsigned long) sq->ring_ptr; 294 + return (int) mem_used; 295 + } 296 + 297 + int __io_uring_queue_init_params(unsigned entries, struct io_uring *ring, 298 + struct io_uring_params *p, void *buf, 299 + size_t buf_size) 300 + { 301 + int fd, ret = 0; 152 302 unsigned *sq_array; 153 303 unsigned sq_entries, index; 154 304 305 + memset(ring, 0, sizeof(*ring)); 306 + 307 + /* 308 + * The kernel does this check already, but checking it here allows us 309 + * to avoid handling it below. 310 + */ 311 + if (p->flags & IORING_SETUP_REGISTERED_FD_ONLY 312 + && !(p->flags & IORING_SETUP_NO_MMAP)) 313 + return -EINVAL; 314 + 315 + if (p->flags & IORING_SETUP_NO_MMAP) { 316 + ret = io_uring_alloc_huge(entries, p, &ring->sq, &ring->cq, 317 + buf, buf_size); 318 + if (ret < 0) 319 + return ret; 320 + if (buf) 321 + ring->int_flags |= INT_FLAG_APP_MEM; 322 + } 323 + 155 324 fd = __sys_io_uring_setup(entries, p); 156 - if (fd < 0) 325 + if (fd < 0) { 326 + if ((p->flags & IORING_SETUP_NO_MMAP) && 327 + !(ring->int_flags & INT_FLAG_APP_MEM)) { 328 + __sys_munmap(ring->sq.sqes, 1); 329 + io_uring_unmap_rings(&ring->sq, &ring->cq); 330 + } 157 331 return fd; 332 + } 158 333 159 - ret = io_uring_queue_mmap(fd, p, ring); 160 - if (ret) { 161 - __sys_close(fd); 162 - return ret; 334 + if (!(p->flags & IORING_SETUP_NO_MMAP)) { 335 + ret = io_uring_queue_mmap(fd, p, ring); 336 + if (ret) { 337 + __sys_close(fd); 338 + return ret; 339 + } 340 + } else { 341 + io_uring_setup_ring_pointers(p, &ring->sq, &ring->cq); 163 342 } 164 343 165 344 /* 166 345 * Directly map SQ slots to SQEs 167 346 */ 168 - sq_array = ring->sq.array; 169 347 sq_entries = ring->sq.ring_entries; 170 - for (index = 0; index < sq_entries; index++) 171 - sq_array[index] = index; 172 348 349 + if (!(p->flags & IORING_SETUP_NO_SQARRAY)) { 350 + sq_array = ring->sq.array; 351 + for (index = 0; index < sq_entries; index++) 352 + sq_array[index] = index; 353 + } 173 354 ring->features = p->features; 174 - return 0; 355 + ring->flags = p->flags; 356 + ring->enter_ring_fd = fd; 357 + if (p->flags & IORING_SETUP_REGISTERED_FD_ONLY) { 358 + ring->ring_fd = -1; 359 + ring->int_flags |= INT_FLAG_REG_RING | INT_FLAG_REG_REG_RING; 360 + } else { 361 + ring->ring_fd = fd; 362 + } 363 + 364 + return ret; 365 + } 366 + 367 + static int io_uring_queue_init_try_nosqarr(unsigned entries, struct io_uring *ring, 368 + struct io_uring_params *p, void *buf, 369 + size_t buf_size) 370 + { 371 + unsigned flags = p->flags; 372 + int ret; 373 + 374 + p->flags |= IORING_SETUP_NO_SQARRAY; 375 + ret = __io_uring_queue_init_params(entries, ring, p, buf, buf_size); 376 + 377 + /* don't fallback if explicitly asked for NOSQARRAY */ 378 + if (ret != -EINVAL || (flags & IORING_SETUP_NO_SQARRAY)) 379 + return ret; 380 + 381 + p->flags = flags; 382 + return __io_uring_queue_init_params(entries, ring, p, buf, buf_size); 383 + } 384 + 385 + /* 386 + * Like io_uring_queue_init_params(), except it allows the application to pass 387 + * in a pre-allocated memory range that is used for the shared data between 388 + * the kernel and the application. This includes the sqes array, and the two 389 + * rings. The memory must be contiguous, the use case here is that the app 390 + * allocates a huge page and passes it in. 391 + * 392 + * Returns the number of bytes used in the buffer, the app can then reuse 393 + * the buffer with the returned offset to put more rings in the same huge 394 + * page. Returns -ENOMEM if there's not enough room left in the buffer to 395 + * host the ring. 396 + */ 397 + int io_uring_queue_init_mem(unsigned entries, struct io_uring *ring, 398 + struct io_uring_params *p, 399 + void *buf, size_t buf_size) 400 + { 401 + /* should already be set... */ 402 + p->flags |= IORING_SETUP_NO_MMAP; 403 + return io_uring_queue_init_try_nosqarr(entries, ring, p, buf, buf_size); 404 + } 405 + 406 + int io_uring_queue_init_params(unsigned entries, struct io_uring *ring, 407 + struct io_uring_params *p) 408 + { 409 + int ret; 410 + 411 + ret = io_uring_queue_init_try_nosqarr(entries, ring, p, NULL, 0); 412 + return ret >= 0 ? 0 : ret; 175 413 } 176 414 177 415 /* ··· 195 433 struct io_uring_cq *cq = &ring->cq; 196 434 size_t sqe_size; 197 435 198 - sqe_size = sizeof(struct io_uring_sqe); 199 - if (ring->flags & IORING_SETUP_SQE128) 200 - sqe_size += 64; 201 - __sys_munmap(sq->sqes, sqe_size * sq->ring_entries); 202 - io_uring_unmap_rings(sq, cq); 436 + if (!sq->ring_sz) { 437 + sqe_size = sizeof(struct io_uring_sqe); 438 + if (ring->flags & IORING_SETUP_SQE128) 439 + sqe_size += 64; 440 + __sys_munmap(sq->sqes, sqe_size * sq->ring_entries); 441 + io_uring_unmap_rings(sq, cq); 442 + } else { 443 + if (!(ring->int_flags & INT_FLAG_APP_MEM)) { 444 + __sys_munmap(sq->sqes, 445 + *sq->kring_entries * sizeof(struct io_uring_sqe)); 446 + io_uring_unmap_rings(sq, cq); 447 + } 448 + } 449 + 203 450 /* 204 451 * Not strictly required, but frees up the slot we used now rather 205 452 * than at process exit time. ··· 250 497 free(probe); 251 498 } 252 499 253 - static inline int __fls(unsigned long x) 254 - { 255 - if (!x) 256 - return 0; 257 - return 8 * sizeof(x) - __builtin_clzl(x); 258 - } 259 - 260 - static unsigned roundup_pow2(unsigned depth) 261 - { 262 - return 1U << __fls(depth - 1); 263 - } 264 - 265 500 static size_t npages(size_t size, long page_size) 266 501 { 267 502 size--; 268 503 size /= page_size; 269 504 return __fls((int) size); 270 505 } 271 - 272 - #define KRING_SIZE 320 273 506 274 507 static size_t rings_size(struct io_uring_params *p, unsigned entries, 275 508 unsigned cq_entries, long page_size) ··· 291 524 pages += (size_t) 1 << npages(sq_size, page_size); 292 525 return pages * page_size; 293 526 } 294 - 295 - #define KERN_MAX_ENTRIES 32768 296 - #define KERN_MAX_CQ_ENTRIES (2 * KERN_MAX_ENTRIES) 297 527 298 528 /* 299 529 * Return the required ulimit -l memlock memory required for a given ring ··· 308 538 { 309 539 struct io_uring_params lp; 310 540 struct io_uring ring; 311 - unsigned cq_entries; 541 + unsigned cq_entries, sq; 312 542 long page_size; 313 543 ssize_t ret; 544 + int cret; 314 545 315 546 memset(&lp, 0, sizeof(lp)); 316 547 ··· 340 571 entries = KERN_MAX_ENTRIES; 341 572 } 342 573 343 - entries = roundup_pow2(entries); 344 - if (p->flags & IORING_SETUP_CQSIZE) { 345 - if (!p->cq_entries) 346 - return -EINVAL; 347 - cq_entries = p->cq_entries; 348 - if (cq_entries > KERN_MAX_CQ_ENTRIES) { 349 - if (!(p->flags & IORING_SETUP_CLAMP)) 350 - return -EINVAL; 351 - cq_entries = KERN_MAX_CQ_ENTRIES; 352 - } 353 - cq_entries = roundup_pow2(cq_entries); 354 - if (cq_entries < entries) 355 - return -EINVAL; 356 - } else { 357 - cq_entries = 2 * entries; 358 - } 574 + cret = get_sq_cq_entries(entries, p, &sq, &cq_entries); 575 + if (cret) 576 + return cret; 359 577 360 578 page_size = get_page_size(); 361 - return rings_size(p, entries, cq_entries, page_size); 579 + return rings_size(p, sq, cq_entries, page_size); 362 580 } 363 581 364 582 /*

+9

vendor/liburing/src/setup.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + #ifndef LIBURING_SETUP_H 3 + #define LIBURING_SETUP_H 4 + 5 + int __io_uring_queue_init_params(unsigned entries, struct io_uring *ring, 6 + struct io_uring_params *p, void *buf, 7 + size_t buf_size); 8 + 9 + #endif

+2

vendor/liburing/src/syscall.h

··· 37 37 #include "arch/x86/syscall.h" 38 38 #elif defined(__aarch64__) 39 39 #include "arch/aarch64/syscall.h" 40 + #elif defined(__riscv) && __riscv_xlen == 64 41 + #include "arch/riscv64/syscall.h" 40 42 #else 41 43 /* 42 44 * We don't have native syscall wrappers

+1 -1

vendor/liburing/src/version.c

··· 17 17 { 18 18 return major > io_uring_major_version() || 19 19 (major == io_uring_major_version() && 20 - minor >= io_uring_minor_version()); 20 + minor > io_uring_minor_version()); 21 21 }

+3 -3

vendor/liburing/test/232c93d07b74.c

··· 64 64 int res; 65 65 66 66 if (p->tcp) { 67 - int val = 1; 68 - 67 + int ret, val = 1; 69 68 70 69 s0 = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); 71 70 res = setsockopt(s0, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)); ··· 77 76 78 77 addr.sin_family = AF_INET; 79 78 addr.sin_addr.s_addr = inet_addr("127.0.0.1"); 80 - assert(t_bind_ephemeral_port(s0, &addr) == 0); 79 + ret = t_bind_ephemeral_port(s0, &addr); 80 + assert(!ret); 81 81 p->bind_port = addr.sin_port; 82 82 } else { 83 83 s0 = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);

+33 -5

vendor/liburing/test/Makefile

··· 46 46 a4c0b3decb33.c \ 47 47 accept.c \ 48 48 accept-link.c \ 49 + accept-non-empty.c \ 49 50 accept-reuse.c \ 50 51 accept-test.c \ 51 52 across-fork.c \ 52 53 b19062a56726.c \ 53 54 b5837bd5311d.c \ 55 + bind-listen.c \ 54 56 buf-ring.c \ 57 + buf-ring-nommap.c \ 58 + buf-ring-put.c \ 55 59 ce593a6c480a.c \ 56 60 close-opath.c \ 57 61 connect.c \ 58 62 connect-rep.c \ 63 + coredump.c \ 59 64 cq-full.c \ 60 65 cq-overflow.c \ 61 66 cq-peek-batch.c \ ··· 65 70 d77a67ed5f27.c \ 66 71 defer.c \ 67 72 defer-taskrun.c \ 73 + defer-tw-timeout.c \ 68 74 double-poll-crash.c \ 69 75 drop-submit.c \ 70 76 eeed8b54e0df.c \ ··· 80 86 fadvise.c \ 81 87 fallocate.c \ 82 88 fc2a85cb02ef.c \ 89 + fd-install.c \ 83 90 fd-pass.c \ 84 91 file-register.c \ 85 92 files-exit-hang-poll.c \ ··· 87 94 file-update.c \ 88 95 file-verify.c \ 89 96 fixed-buf-iter.c \ 97 + fixed-buf-merge.c \ 98 + fixed-hugepage.c \ 90 99 fixed-link.c \ 91 100 fixed-reuse.c \ 92 101 fpos.c \ 93 102 fsnotify.c \ 94 103 fsync.c \ 104 + futex.c \ 95 105 hardlink.c \ 106 + ignore-single-mmap.c \ 107 + init-mem.c \ 96 108 io-cancel.c \ 97 109 iopoll.c \ 98 110 iopoll-leak.c \ ··· 109 121 madvise.c \ 110 122 mkdir.c \ 111 123 msg-ring.c \ 124 + msg-ring-fd.c \ 112 125 msg-ring-flags.c \ 113 126 msg-ring-overflow.c \ 114 127 multicqes_drain.c \ 128 + no-mmap-inval.c \ 115 129 nolibc.c \ 116 130 nop-all-sizes.c \ 117 131 nop.c \ 132 + ooo-file-unreg.c \ 118 133 openat2.c \ 119 134 open-close.c \ 120 135 open-direct-link.c \ ··· 135 150 poll-race-mshot.c \ 136 151 poll-ring.c \ 137 152 poll-v-poll.c \ 138 - pollfree.c \ 139 153 probe.c \ 140 154 read-before-exit.c \ 155 + read-mshot.c \ 156 + read-mshot-empty.c \ 141 157 read-write.c \ 142 158 recv-msgall.c \ 143 159 recv-msgall-stream.c \ 144 160 recv-multishot.c \ 161 + reg-fd-only.c \ 145 162 reg-hint.c \ 146 163 reg-reg-ring.c \ 147 164 regbuf-merge.c \ 148 165 register-restrictions.c \ 149 166 rename.c \ 150 167 ringbuf-read.c \ 168 + ringbuf-status.c \ 151 169 ring-leak2.c \ 152 170 ring-leak.c \ 153 171 rsrc_tags.c \ 154 172 rw_merge_test.c \ 155 173 self.c \ 156 - sendmsg_fs_cve.c \ 174 + recvsend_bundle.c \ 157 175 send_recv.c \ 158 176 send_recvmsg.c \ 159 177 send-zerocopy.c \ ··· 164 182 single-issuer.c \ 165 183 skip-cqe.c \ 166 184 socket.c \ 185 + socket-io-cmd.c \ 186 + socket-getsetsock-cmd.c \ 167 187 socket-rw.c \ 168 188 socket-rw-eagain.c \ 169 189 socket-rw-offset.c \ 170 190 splice.c \ 171 191 sq-full.c \ 172 192 sq-full-cpp.cc \ 173 - sqpoll-cancel-hang.c \ 174 193 sqpoll-disable-exit.c \ 194 + sqpoll-exec.c \ 175 195 sq-poll-dup.c \ 176 196 sqpoll-exit-hang.c \ 177 197 sq-poll-kthread.c \ ··· 188 208 thread-exit.c \ 189 209 timeout.c \ 190 210 timeout-new.c \ 211 + truncate.c \ 191 212 tty-write-dpoll.c \ 192 213 unlink.c \ 193 214 version.c \ 215 + waitid.c \ 194 216 wakeup-hang.c \ 217 + wq-aff.c \ 195 218 xattr.c \ 196 219 # EOL 197 220 ··· 223 246 helpers.o: helpers.c 224 247 $(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ -c $< 225 248 226 - %.t: %.c $(helpers) helpers.h ../src/liburing.a 249 + LIBURING := $(shell if [ -e ../src/liburing.a ]; then echo ../src/liburing.a; fi) 250 + 251 + %.t: %.c $(helpers) helpers.h $(LIBURING) 227 252 $(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ $< $(helpers) $(LDFLAGS) 228 253 229 254 # ··· 232 257 # cc1plus: warning: command-line option '-Wmissing-prototypes' \ 233 258 # is valid for C/ObjC but not for C++ 234 259 # 235 - %.t: %.cc $(helpers) helpers.h ../src/liburing.a 260 + %.t: %.cc $(helpers) helpers.h $(LIBURING) 236 261 $(QUIET_CXX)$(CXX) \ 237 262 $(patsubst -Wmissing-prototypes,,$(CPPFLAGS)) \ 238 263 $(patsubst -Wmissing-prototypes,,$(CXXFLAGS)) \ ··· 244 269 $(INSTALL) -D -m 755 $(test_targets) $(datadir)/liburing-test/ 245 270 $(INSTALL) -D -m 755 runtests.sh $(datadir)/liburing-test/ 246 271 $(INSTALL) -D -m 755 runtests-loop.sh $(datadir)/liburing-test/ 272 + 273 + uninstall: 274 + @rm -rf $(datadir)/liburing-test/ 247 275 248 276 clean: 249 277 @rm -f $(all_targets) helpers.o output/*

+1 -1

vendor/liburing/test/a4c0b3decb33.c

··· 109 109 static void loop(void) 110 110 { 111 111 int iter; 112 - for (iter = 0; iter < 5000; iter++) { 112 + for (iter = 0; iter < 50; iter++) { 113 113 int pid = fork(); 114 114 if (pid < 0) 115 115 exit(1);

+256

vendor/liburing/test/accept-non-empty.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Check that kernels that support it will return IORING_CQE_F_SOCK_NONEMPTY 4 + * on accepts requests where more connections are pending. 5 + */ 6 + #include <stdio.h> 7 + #include <stdlib.h> 8 + #include <stdint.h> 9 + #include <assert.h> 10 + 11 + #include <errno.h> 12 + #include <fcntl.h> 13 + #include <unistd.h> 14 + #include <sys/socket.h> 15 + #include <sys/time.h> 16 + #include <sys/resource.h> 17 + #include <sys/un.h> 18 + #include <netinet/tcp.h> 19 + #include <netinet/in.h> 20 + #include <arpa/inet.h> 21 + #include <pthread.h> 22 + 23 + #include "liburing.h" 24 + #include "helpers.h" 25 + 26 + static int no_more_accept; 27 + 28 + #define MAX_ACCEPTS 8 29 + 30 + struct data { 31 + pthread_t thread; 32 + pthread_barrier_t barrier; 33 + pthread_barrier_t conn_barrier; 34 + int connects; 35 + }; 36 + 37 + static int start_accept_listen(int port_off, int extra_flags) 38 + { 39 + struct sockaddr_in addr; 40 + int32_t val = 1; 41 + int fd, ret; 42 + 43 + fd = socket(AF_INET, SOCK_STREAM | extra_flags, IPPROTO_TCP); 44 + 45 + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)); 46 + assert(ret != -1); 47 + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); 48 + assert(ret != -1); 49 + 50 + addr.sin_family = AF_INET; 51 + addr.sin_port = htons(0x1235 + port_off); 52 + addr.sin_addr.s_addr = inet_addr("127.0.0.1"); 53 + 54 + ret = bind(fd, (struct sockaddr *) &addr, sizeof(addr)); 55 + assert(ret != -1); 56 + ret = listen(fd, 20000); 57 + assert(ret != -1); 58 + 59 + return fd; 60 + } 61 + 62 + static int test_maccept(struct data *d, int flags, int fixed) 63 + { 64 + struct io_uring_params p = { }; 65 + struct io_uring ring; 66 + struct io_uring_cqe *cqe; 67 + struct io_uring_sqe *sqe; 68 + int err = 0, fd, ret, i, *fds; 69 + 70 + p.flags = flags; 71 + ret = io_uring_queue_init_params(8, &ring, &p); 72 + if (ret == -EINVAL) { 73 + return T_EXIT_SKIP; 74 + } else if (ret < 0) { 75 + fprintf(stderr, "ring setup failure: %d\n", ret); 76 + return T_EXIT_FAIL; 77 + } 78 + 79 + if (!(p.features & IORING_FEAT_RECVSEND_BUNDLE)) { 80 + no_more_accept = 1; 81 + return 0; 82 + } 83 + 84 + fds = malloc(MAX_ACCEPTS * sizeof(int)); 85 + memset(fds, -1, MAX_ACCEPTS * sizeof(int)); 86 + 87 + if (fixed) { 88 + io_uring_register_ring_fd(&ring); 89 + 90 + ret = io_uring_register_files(&ring, fds, MAX_ACCEPTS); 91 + if (ret) { 92 + fprintf(stderr, "file reg %d\n", ret); 93 + return -1; 94 + } 95 + } 96 + 97 + fd = start_accept_listen(0, 0); 98 + 99 + pthread_barrier_wait(&d->barrier); 100 + 101 + if (d->connects > 1) 102 + pthread_barrier_wait(&d->conn_barrier); 103 + 104 + for (i = 0; i < d->connects; i++) { 105 + sqe = io_uring_get_sqe(&ring); 106 + if (fixed) 107 + io_uring_prep_accept_direct(sqe, fd, NULL, NULL, 0, i); 108 + else 109 + io_uring_prep_accept(sqe, fd, NULL, NULL, 0); 110 + 111 + ret = io_uring_submit_and_wait(&ring, 1); 112 + assert(ret != -1); 113 + 114 + ret = io_uring_wait_cqe(&ring, &cqe); 115 + assert(!ret); 116 + if (cqe->res < 0) { 117 + fprintf(stderr, "res=%d\n", cqe->res); 118 + break; 119 + } 120 + fds[i] = cqe->res; 121 + if (d->connects == 1) { 122 + if (cqe->flags & IORING_CQE_F_SOCK_NONEMPTY) { 123 + fprintf(stderr, "Non-empty sock on single?\n"); 124 + err = 1; 125 + break; 126 + } 127 + } else { 128 + int last = i + 1 == d->connects; 129 + 130 + if (last && cqe->flags & IORING_CQE_F_SOCK_NONEMPTY) { 131 + fprintf(stderr, "Non-empty sock on last?\n"); 132 + err = 1; 133 + break; 134 + } else if (!last && !(cqe->flags & IORING_CQE_F_SOCK_NONEMPTY)) { 135 + fprintf(stderr, "Empty on multi connect?\n"); 136 + err = 1; 137 + break; 138 + } 139 + } 140 + io_uring_cqe_seen(&ring, cqe); 141 + } 142 + 143 + close(fd); 144 + if (!fixed) { 145 + for (i = 0; i < MAX_ACCEPTS; i++) 146 + if (fds[i] != -1) 147 + close(fds[i]); 148 + } 149 + free(fds); 150 + io_uring_queue_exit(&ring); 151 + return err; 152 + } 153 + 154 + static void *connect_fn(void *data) 155 + { 156 + struct sockaddr_in addr = { }; 157 + struct data *d = data; 158 + int i; 159 + 160 + pthread_barrier_wait(&d->barrier); 161 + 162 + addr.sin_family = AF_INET; 163 + addr.sin_port = htons(0x1235); 164 + addr.sin_addr.s_addr = inet_addr("127.0.0.1"); 165 + 166 + for (i = 0; i < d->connects; i++) { 167 + int s; 168 + 169 + s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 170 + if (s < 0) { 171 + perror("socket"); 172 + break; 173 + } 174 + if (connect(s, (struct sockaddr *) &addr, sizeof(addr)) < 0) { 175 + perror("connect"); 176 + break; 177 + } 178 + } 179 + 180 + if (d->connects > 1) 181 + pthread_barrier_wait(&d->conn_barrier); 182 + 183 + return NULL; 184 + } 185 + 186 + static void setup_thread(struct data *d, int nconns) 187 + { 188 + d->connects = nconns; 189 + pthread_barrier_init(&d->barrier, NULL, 2); 190 + pthread_barrier_init(&d->conn_barrier, NULL, 2); 191 + pthread_create(&d->thread, NULL, connect_fn, d); 192 + } 193 + 194 + static int test(int flags, int fixed) 195 + { 196 + struct data d; 197 + void *tret; 198 + int ret; 199 + 200 + setup_thread(&d, 1); 201 + ret = test_maccept(&d, flags, fixed); 202 + if (ret) { 203 + fprintf(stderr, "test conns=1 failed\n"); 204 + return ret; 205 + } 206 + if (no_more_accept) 207 + return T_EXIT_SKIP; 208 + 209 + pthread_join(d.thread, &tret); 210 + 211 + setup_thread(&d, MAX_ACCEPTS); 212 + ret = test_maccept(&d, flags, fixed); 213 + if (ret) { 214 + fprintf(stderr, "test conns=MAX failed\n"); 215 + return ret; 216 + } 217 + 218 + pthread_join(d.thread, &tret); 219 + return 0; 220 + } 221 + 222 + int main(int argc, char *argv[]) 223 + { 224 + int ret; 225 + 226 + if (argc > 1) 227 + return T_EXIT_SKIP; 228 + 229 + ret = test(0, 0); 230 + if (no_more_accept) 231 + return T_EXIT_SKIP; 232 + if (ret) { 233 + fprintf(stderr, "test 0 0 failed\n"); 234 + return ret; 235 + } 236 + 237 + ret = test(IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN, 0); 238 + if (ret) { 239 + fprintf(stderr, "test DEFER 0 failed\n"); 240 + return ret; 241 + } 242 + 243 + ret = test(0, 1); 244 + if (ret) { 245 + fprintf(stderr, "test 0 1 failed\n"); 246 + return ret; 247 + } 248 + 249 + ret = test(IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN, 1); 250 + if (ret) { 251 + fprintf(stderr, "test DEFER 1 failed\n"); 252 + return ret; 253 + } 254 + 255 + return 0; 256 + }

+1 -1

vendor/liburing/test/accept-reuse.c

··· 45 45 return T_EXIT_SKIP; 46 46 47 47 memset(&params, 0, sizeof(params)); 48 - ret = io_uring_queue_init_params(4, &io_uring, &params); 48 + ret = t_io_uring_init_sqarray(4, &io_uring, &params); 49 49 if (ret) { 50 50 fprintf(stderr, "io_uring_init_failed: %d\n", ret); 51 51 return T_EXIT_FAIL;

+22 -5

vendor/liburing/test/accept.c

··· 195 195 196 196 addr->sin_family = AF_INET; 197 197 addr->sin_addr.s_addr = inet_addr("127.0.0.1"); 198 - assert(!t_bind_ephemeral_port(fd, addr)); 198 + ret = t_bind_ephemeral_port(fd, addr); 199 + assert(!ret); 199 200 ret = listen(fd, 128); 200 201 assert(ret != -1); 201 202 ··· 309 310 fixed ? "Fixed" : "", 310 311 multishot ? "Multishot" : "", 311 312 i, s_fd[i]); 313 + goto err; 314 + } else if (s_fd[i] == 195 && args.overflow) { 315 + fprintf(stderr, "Broken overflow handling\n"); 312 316 goto err; 313 317 } 314 318 ··· 427 431 }; 428 432 429 433 /* 430 - * Test issue many accepts and see if we handle cancellation on exit 434 + * Test issue many accepts and see if we handle cancelation on exit 431 435 */ 432 436 static int test_accept_many(struct test_accept_many_args args) 433 437 { ··· 482 486 if (io_uring_peek_cqe(&m_io_uring, &cqe)) 483 487 break; 484 488 if (cqe->res != -ECANCELED) { 485 - fprintf(stderr, "Expected cqe to be cancelled %d\n", cqe->res); 489 + fprintf(stderr, "Expected cqe to be canceled %d\n", cqe->res); 486 490 ret = 1; 487 491 goto out; 488 492 } ··· 554 558 fprintf(stderr, "unexpected 0 user data\n"); 555 559 goto err; 556 560 } else if (cqe->user_data <= nr) { 561 + /* no multishot */ 562 + if (cqe->res == -EINVAL) 563 + return T_EXIT_SKIP; 557 564 if (cqe->res != -EINTR && cqe->res != -ECANCELED) { 558 565 fprintf(stderr, "Cancelled accept got %d\n", cqe->res); 559 566 goto err; ··· 678 685 ret = io_uring_queue_init(32, &m_io_uring, 0); 679 686 assert(ret >= 0); 680 687 ret = io_uring_register_files(&m_io_uring, &fd, 1); 681 - assert(ret == 0); 688 + if (ret) { 689 + /* kernel doesn't support sparse registered files, skip */ 690 + if (ret == -EBADF || ret == -EINVAL) 691 + return T_EXIT_SKIP; 692 + return T_EXIT_FAIL; 693 + } 682 694 ret = test(&m_io_uring, args); 683 695 io_uring_queue_exit(&m_io_uring); 684 696 return ret; ··· 700 712 ret = io_uring_queue_init(MAX_FDS + 10, &m_io_uring, 0); 701 713 assert(ret >= 0); 702 714 ret = io_uring_register_files(&m_io_uring, fd, MAX_FDS); 703 - assert(ret == 0); 715 + if (ret) { 716 + /* kernel doesn't support sparse registered files, skip */ 717 + if (ret == -EBADF || ret == -EINVAL) 718 + return T_EXIT_SKIP; 719 + return T_EXIT_FAIL; 720 + } 704 721 ret = test(&m_io_uring, args); 705 722 io_uring_queue_exit(&m_io_uring); 706 723 return ret;

+408

vendor/liburing/test/bind-listen.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Configure and operate a TCP socket solely with io_uring. 4 + */ 5 + #include <stdio.h> 6 + #include <string.h> 7 + #include <liburing.h> 8 + #include <err.h> 9 + #include <sys/mman.h> 10 + #include <sys/wait.h> 11 + #include <sys/socket.h> 12 + #include <unistd.h> 13 + #include <stdlib.h> 14 + #include <netinet/ip.h> 15 + #include "liburing.h" 16 + #include "helpers.h" 17 + 18 + static void msec_to_ts(struct __kernel_timespec *ts, unsigned int msec) 19 + { 20 + ts->tv_sec = msec / 1000; 21 + ts->tv_nsec = (msec % 1000) * 1000000; 22 + } 23 + 24 + static const char *magic = "Hello World!"; 25 + static int use_port = 8000; 26 + 27 + enum { 28 + SRV_INDEX = 0, 29 + CLI_INDEX, 30 + CONN_INDEX, 31 + }; 32 + 33 + static int connect_client(struct io_uring *ring, unsigned short peer_port) 34 + { 35 + struct __kernel_timespec ts; 36 + struct io_uring_sqe *sqe; 37 + struct io_uring_cqe *cqe; 38 + int head, ret, submitted = 0; 39 + struct sockaddr_in peer_addr; 40 + socklen_t addr_len = sizeof(peer_addr); 41 + 42 + peer_addr.sin_family = AF_INET; 43 + peer_addr.sin_port = peer_port; 44 + peer_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 45 + 46 + sqe = io_uring_get_sqe(ring); 47 + io_uring_prep_socket_direct(sqe, AF_INET, SOCK_STREAM, 0, 48 + CLI_INDEX, 0); 49 + sqe->flags |= IOSQE_IO_LINK; 50 + 51 + sqe = io_uring_get_sqe(ring); 52 + io_uring_prep_connect(sqe, CLI_INDEX, (struct sockaddr*) &peer_addr, addr_len); 53 + sqe->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK; 54 + 55 + sqe = io_uring_get_sqe(ring); 56 + io_uring_prep_send(sqe, CLI_INDEX, magic, strlen(magic), 0); 57 + sqe->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK; 58 + 59 + submitted = ret = io_uring_submit(ring); 60 + if (ret < 0) 61 + return T_SETUP_SKIP; 62 + 63 + msec_to_ts(&ts, 300); 64 + ret = io_uring_wait_cqes(ring, &cqe, submitted, &ts, NULL); 65 + if (ret < 0) 66 + return T_SETUP_SKIP; 67 + 68 + io_uring_for_each_cqe(ring, head, cqe) { 69 + ret = cqe->res; 70 + if (ret < 0) 71 + return T_SETUP_SKIP; 72 + } io_uring_cq_advance(ring, submitted); 73 + 74 + return T_SETUP_OK; 75 + } 76 + 77 + static int setup_srv(struct io_uring *ring, struct sockaddr_in *server_addr) 78 + { 79 + struct io_uring_sqe *sqe; 80 + struct io_uring_cqe *cqe; 81 + struct __kernel_timespec ts; 82 + int ret, val, submitted; 83 + unsigned head; 84 + 85 + memset(server_addr, 0, sizeof(struct sockaddr_in)); 86 + server_addr->sin_family = AF_INET; 87 + server_addr->sin_port = htons(use_port++); 88 + server_addr->sin_addr.s_addr = htons(INADDR_ANY); 89 + 90 + sqe = io_uring_get_sqe(ring); 91 + io_uring_prep_socket_direct(sqe, AF_INET, SOCK_STREAM, 0, SRV_INDEX, 0); 92 + sqe->flags |= IOSQE_IO_LINK; 93 + 94 + sqe = io_uring_get_sqe(ring); 95 + val = 1; 96 + io_uring_prep_cmd_sock(sqe, SOCKET_URING_OP_SETSOCKOPT, 0, SOL_SOCKET, 97 + SO_REUSEADDR, &val, sizeof(val)); 98 + sqe->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK; 99 + 100 + sqe = io_uring_get_sqe(ring); 101 + io_uring_prep_bind(sqe, SRV_INDEX, (struct sockaddr *) server_addr, 102 + sizeof(struct sockaddr_in)); 103 + sqe->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK; 104 + 105 + sqe = io_uring_get_sqe(ring); 106 + io_uring_prep_listen(sqe, SRV_INDEX, 1); 107 + sqe->flags |= IOSQE_FIXED_FILE; 108 + 109 + submitted = ret = io_uring_submit(ring); 110 + if (ret < 0) { 111 + fprintf(stderr, "submission failed. %d\n", ret); 112 + return T_EXIT_FAIL; 113 + } 114 + 115 + msec_to_ts(&ts, 300); 116 + ret = io_uring_wait_cqes(ring, &cqe, ret, &ts, NULL); 117 + if (ret < 0) { 118 + fprintf(stderr, "submission failed. %d\n", ret); 119 + return T_EXIT_FAIL; 120 + } 121 + 122 + io_uring_for_each_cqe(ring, head, cqe) { 123 + ret = cqe->res; 124 + if (ret < 0) { 125 + fprintf(stderr, "Server startup failed. step %d got %d \n", head, ret); 126 + return T_EXIT_FAIL; 127 + } 128 + } io_uring_cq_advance(ring, submitted); 129 + 130 + return T_SETUP_OK; 131 + } 132 + 133 + static int test_good_server(unsigned int ring_flags) 134 + { 135 + struct sockaddr_in server_addr; 136 + struct __kernel_timespec ts; 137 + struct io_uring_sqe *sqe; 138 + struct io_uring_cqe *cqe; 139 + struct io_uring ring; 140 + int ret; 141 + int fds[3]; 142 + char buf[1024]; 143 + 144 + memset(fds, -1, sizeof(fds)); 145 + 146 + ret = t_create_ring(10, &ring, ring_flags | IORING_SETUP_SUBMIT_ALL); 147 + if (ret < 0) { 148 + fprintf(stderr, "queue_init: %s\n", strerror(-ret)); 149 + return T_SETUP_SKIP; 150 + } 151 + 152 + ret = io_uring_register_files(&ring, fds, 3); 153 + if (ret) { 154 + fprintf(stderr, "server file register %d\n", ret); 155 + return T_SETUP_SKIP; 156 + } 157 + 158 + ret = setup_srv(&ring, &server_addr); 159 + if (ret != T_SETUP_OK) { 160 + fprintf(stderr, "srv startup failed.\n"); 161 + return T_EXIT_FAIL; 162 + } 163 + 164 + if (connect_client(&ring, server_addr.sin_port) != T_SETUP_OK) { 165 + fprintf(stderr, "cli startup failed.\n"); 166 + return T_SETUP_SKIP; 167 + } 168 + 169 + /* Wait for a request */ 170 + sqe = io_uring_get_sqe(&ring); 171 + io_uring_prep_accept_direct(sqe, SRV_INDEX, NULL, NULL, 0, CONN_INDEX); 172 + sqe->flags |= IOSQE_FIXED_FILE; 173 + 174 + io_uring_submit(&ring); 175 + io_uring_wait_cqe(&ring, &cqe); 176 + if (cqe->res < 0) { 177 + fprintf(stderr, "accept failed. %d\n", cqe->res); 178 + return T_EXIT_FAIL; 179 + } 180 + io_uring_cqe_seen(&ring, cqe); 181 + 182 + sqe = io_uring_get_sqe(&ring); 183 + io_uring_prep_recv(sqe, CONN_INDEX, buf, BUFSIZ, 0); 184 + sqe->flags |= IOSQE_FIXED_FILE; 185 + 186 + io_uring_submit(&ring); 187 + io_uring_wait_cqe_timeout(&ring, &cqe, &ts); 188 + 189 + if (cqe->res < 0) { 190 + fprintf(stderr, "bad receive cqe. %d\n", cqe->res); 191 + return T_EXIT_FAIL; 192 + } 193 + ret = cqe->res; 194 + io_uring_cqe_seen(&ring, cqe); 195 + 196 + io_uring_queue_exit(&ring); 197 + 198 + if (ret != strlen(magic) || strncmp(buf, magic, ret)) { 199 + fprintf(stderr, "didn't receive expected string. Got %d '%s'\n", ret, buf); 200 + return T_EXIT_FAIL; 201 + } 202 + 203 + return T_EXIT_PASS; 204 + } 205 + 206 + static int test_bad_bind(void) 207 + { 208 + struct sockaddr_in server_addr; 209 + struct io_uring_sqe *sqe; 210 + struct io_uring_cqe *cqe; 211 + struct io_uring ring; 212 + int sock = -1, err; 213 + int ret = T_EXIT_FAIL; 214 + 215 + memset(&server_addr, 0, sizeof(struct sockaddr_in)); 216 + server_addr.sin_family = AF_INET; 217 + server_addr.sin_port = htons(9001); 218 + server_addr.sin_addr.s_addr = htons(INADDR_ANY); 219 + 220 + err = t_create_ring(1, &ring, 0); 221 + if (err < 0) { 222 + fprintf(stderr, "queue_init: %s\n", strerror(-ret)); 223 + return T_SETUP_SKIP; 224 + } 225 + 226 + sock = socket(AF_INET, SOCK_STREAM, 0); 227 + if (sock < 0) { 228 + perror("socket"); 229 + goto fail; 230 + } 231 + 232 + /* Bind with size 0 */ 233 + sqe = io_uring_get_sqe(&ring); 234 + io_uring_prep_bind(sqe, sock, (struct sockaddr *) &server_addr, 0); 235 + err = io_uring_submit(&ring); 236 + if (err < 0) 237 + goto fail; 238 + 239 + err = io_uring_wait_cqe(&ring, &cqe); 240 + if (err) 241 + goto fail; 242 + 243 + if (cqe->res != -EINVAL) 244 + goto fail; 245 + io_uring_cqe_seen(&ring, cqe); 246 + 247 + /* Bind with bad fd */ 248 + sqe = io_uring_get_sqe(&ring); 249 + io_uring_prep_bind(sqe, 0, (struct sockaddr *) &server_addr, sizeof(struct sockaddr_in)); 250 + err = io_uring_submit(&ring); 251 + if (err < 0) 252 + goto fail; 253 + 254 + err = io_uring_wait_cqe(&ring, &cqe); 255 + if (err) 256 + goto fail; 257 + if (cqe->res != -ENOTSOCK) 258 + goto fail; 259 + io_uring_cqe_seen(&ring, cqe); 260 + 261 + ret = T_EXIT_PASS; 262 + 263 + /* bind with weird value */ 264 + sqe = io_uring_get_sqe(&ring); 265 + io_uring_prep_bind(sqe, sock, (struct sockaddr *) &server_addr, sizeof(struct sockaddr_in)); 266 + sqe->rw_flags = 1; 267 + err = io_uring_submit(&ring); 268 + if (err < 0) 269 + goto fail; 270 + 271 + err = io_uring_wait_cqe(&ring, &cqe); 272 + if (err) 273 + goto fail; 274 + if (cqe->res != -EINVAL) 275 + goto fail; 276 + io_uring_cqe_seen(&ring, cqe); 277 + 278 + ret = T_EXIT_PASS; 279 + 280 + fail: 281 + io_uring_queue_exit(&ring); 282 + if (sock != -1) 283 + close(sock); 284 + return ret; 285 + } 286 + 287 + static int test_bad_listen(void) 288 + { 289 + struct sockaddr_in server_addr; 290 + struct io_uring_sqe *sqe; 291 + struct io_uring_cqe *cqe; 292 + struct io_uring ring; 293 + int sock = -1, err; 294 + int ret = T_EXIT_FAIL; 295 + 296 + memset(&server_addr, 0, sizeof(struct sockaddr_in)); 297 + server_addr.sin_family = AF_INET; 298 + server_addr.sin_port = htons(8001); 299 + server_addr.sin_addr.s_addr = htons(INADDR_ANY); 300 + 301 + err = t_create_ring(1, &ring, 0); 302 + if (err < 0) { 303 + fprintf(stderr, "queue_init: %d\n", err); 304 + return T_SETUP_SKIP; 305 + } 306 + 307 + sock = socket(AF_INET, SOCK_STREAM, 0); 308 + if (sock < 0) { 309 + perror("socket"); 310 + goto fail; 311 + } 312 + 313 + err = t_bind_ephemeral_port(sock, &server_addr); 314 + if (err) { 315 + fprintf(stderr, "bind: %s\n", strerror(-err)); 316 + goto fail; 317 + } 318 + 319 + /* listen on bad sock */ 320 + sqe = io_uring_get_sqe(&ring); 321 + io_uring_prep_listen(sqe, 0, 1); 322 + err = io_uring_submit(&ring); 323 + if (err < 0) 324 + goto fail; 325 + 326 + err = io_uring_wait_cqe(&ring, &cqe); 327 + if (err) 328 + goto fail; 329 + 330 + if (cqe->res != -ENOTSOCK) 331 + goto fail; 332 + io_uring_cqe_seen(&ring, cqe); 333 + 334 + /* listen with weird parameters */ 335 + sqe = io_uring_get_sqe(&ring); 336 + io_uring_prep_listen(sqe, sock, 1); 337 + sqe->addr2 = 0xffffff; 338 + err = io_uring_submit(&ring); 339 + if (err < 0) 340 + goto fail; 341 + 342 + err = io_uring_wait_cqe(&ring, &cqe); 343 + if (err) 344 + goto fail; 345 + 346 + if (cqe->res != -EINVAL) 347 + goto fail; 348 + io_uring_cqe_seen(&ring, cqe); 349 + 350 + ret = T_EXIT_PASS; 351 + fail: 352 + io_uring_queue_exit(&ring); 353 + if (sock != -1) 354 + close(sock); 355 + return ret; 356 + } 357 + 358 + int main(int argc, char *argv[]) 359 + { 360 + struct io_uring_probe *probe; 361 + int ret; 362 + 363 + if (argc > 1) 364 + return 0; 365 + 366 + /* 367 + * This test is not supported on older kernels. Check for 368 + * OP_LISTEN, since that is the last feature required to support 369 + * it. 370 + */ 371 + probe = io_uring_get_probe(); 372 + if (!probe) 373 + return T_EXIT_SKIP; 374 + if (!io_uring_opcode_supported(probe, IORING_OP_LISTEN)) 375 + return T_EXIT_SKIP; 376 + 377 + ret = test_good_server(0); 378 + if (ret) { 379 + fprintf(stderr, "good 0 failed\n"); 380 + return T_EXIT_FAIL; 381 + } 382 + 383 + ret = test_good_server(IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN); 384 + if (ret) { 385 + fprintf(stderr, "good defer failed\n"); 386 + return T_EXIT_FAIL; 387 + } 388 + 389 + ret = test_good_server(IORING_SETUP_SQPOLL); 390 + if (ret) { 391 + fprintf(stderr, "good sqpoll failed\n"); 392 + return T_EXIT_FAIL; 393 + } 394 + 395 + ret = test_bad_bind(); 396 + if (ret) { 397 + fprintf(stderr, "bad bind failed\n"); 398 + return T_EXIT_FAIL; 399 + } 400 + 401 + ret = test_bad_listen(); 402 + if (ret) { 403 + fprintf(stderr, "bad listen failed\n"); 404 + return T_EXIT_FAIL; 405 + } 406 + 407 + return T_EXIT_PASS; 408 + }

+123

vendor/liburing/test/buf-ring-nommap.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Description: test IOU_PBUF_RING_MMAP with a ring setup with a ring 4 + * setup without mmap'ing sq/cq arrays 5 + * 6 + */ 7 + #include <stdio.h> 8 + #include <stdlib.h> 9 + #include <unistd.h> 10 + #include <fcntl.h> 11 + #include <string.h> 12 + #include <sys/mman.h> 13 + 14 + #include "liburing.h" 15 + #include "helpers.h" 16 + 17 + static int bgid = 5; 18 + static int bid = 89; 19 + 20 + int main(int argc, char *argv[]) 21 + { 22 + struct io_uring_buf_ring *br; 23 + struct io_uring_sqe *sqe; 24 + struct io_uring_cqe *cqe; 25 + struct io_uring ring; 26 + size_t ring_size; 27 + int ret, ring_mask, fds[2]; 28 + struct io_uring_buf_reg reg = { 29 + .ring_entries = 1, 30 + .bgid = bgid, 31 + .flags = IOU_PBUF_RING_MMAP, 32 + }; 33 + struct io_uring_params p = { }; 34 + void *ring_mem; 35 + char buf[32]; 36 + off_t off; 37 + 38 + if (argc > 1) 39 + return T_EXIT_SKIP; 40 + 41 + if (posix_memalign(&ring_mem, 16384, 16384)) 42 + return T_EXIT_FAIL; 43 + 44 + memset(ring_mem, 0, 16384); 45 + 46 + p.flags = IORING_SETUP_NO_MMAP; 47 + ret = io_uring_queue_init_mem(1, &ring, &p, ring_mem, 16384); 48 + if (ret < 0) { 49 + if (ret == -EINVAL || ret == -ENOMEM) 50 + return T_EXIT_SKIP; 51 + fprintf(stderr, "queue init failed %d\n", ret); 52 + return T_EXIT_FAIL; 53 + } 54 + 55 + if (pipe(fds) < 0) { 56 + perror("pipe"); 57 + return T_EXIT_FAIL; 58 + } 59 + 60 + ring_size = sizeof(struct io_uring_buf); 61 + ring_mask = io_uring_buf_ring_mask(1); 62 + 63 + ret = io_uring_register_buf_ring(&ring, &reg, 0); 64 + if (ret) { 65 + if (ret == -EINVAL) 66 + return T_EXIT_SKIP; 67 + fprintf(stderr, "reg buf ring: %d\n", ret); 68 + return T_EXIT_FAIL; 69 + } 70 + 71 + off = IORING_OFF_PBUF_RING | 72 + (unsigned long long) bgid << IORING_OFF_PBUF_SHIFT; 73 + br = mmap(NULL, ring_size, PROT_READ | PROT_WRITE, 74 + MAP_SHARED | MAP_POPULATE, ring.ring_fd, off); 75 + if (br == MAP_FAILED) { 76 + if (errno == ENOMEM) 77 + return T_EXIT_SKIP; 78 + perror("mmap"); 79 + return T_EXIT_FAIL; 80 + } 81 + 82 + io_uring_buf_ring_add(br, buf, sizeof(buf), bid, ring_mask, 0); 83 + io_uring_buf_ring_advance(br, 1); 84 + 85 + sqe = io_uring_get_sqe(&ring); 86 + io_uring_prep_read(sqe, fds[0], NULL, 0, 0); 87 + sqe->flags |= IOSQE_BUFFER_SELECT; 88 + sqe->buf_group = bgid; 89 + 90 + io_uring_submit(&ring); 91 + 92 + ret = write(fds[1], "Hello", 5); 93 + if (ret < 0) { 94 + perror("write"); 95 + return T_EXIT_FAIL; 96 + } else if (ret != 5) { 97 + fprintf(stderr, "short write %d\n", ret); 98 + return T_EXIT_FAIL; 99 + } 100 + 101 + ret = io_uring_wait_cqe(&ring, &cqe); 102 + if (ret) { 103 + fprintf(stderr, "wait %d\n", ret); 104 + return T_EXIT_FAIL; 105 + } 106 + if (cqe->res < 0) { 107 + fprintf(stderr, "cqe res %d\n", cqe->res); 108 + return T_EXIT_FAIL; 109 + } 110 + if (!(cqe->flags & IORING_CQE_F_BUFFER)) { 111 + fprintf(stderr, "buffer not selected in cqe\n"); 112 + return T_EXIT_FAIL; 113 + } 114 + if ((cqe->flags >> IORING_CQE_BUFFER_SHIFT) != bid) { 115 + fprintf(stderr, "wrong buffer id returned\n"); 116 + return T_EXIT_FAIL; 117 + } 118 + 119 + io_uring_cqe_seen(&ring, cqe); 120 + 121 + io_uring_queue_exit(&ring); 122 + return T_EXIT_PASS; 123 + }

+83

vendor/liburing/test/buf-ring-put.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Description: test persistence of mmap'ed provided ring buffers. Use a range 4 + * of buffer group IDs that puts us into both the lower end array 5 + * and higher end xarry. 6 + * 7 + */ 8 + #include <stdio.h> 9 + #include <stdlib.h> 10 + #include <unistd.h> 11 + #include <fcntl.h> 12 + #include <string.h> 13 + #include <sys/mman.h> 14 + 15 + #include "liburing.h" 16 + #include "helpers.h" 17 + 18 + #define BGID_START 60 19 + #define BGID_NR 10 20 + #define ENTRIES 512 21 + 22 + int main(int argc, char *argv[]) 23 + { 24 + struct io_uring_buf_ring *br[BGID_NR]; 25 + struct io_uring ring; 26 + size_t ring_size; 27 + int ret, i, j; 28 + 29 + if (argc > 1) 30 + return T_EXIT_SKIP; 31 + 32 + ret = io_uring_queue_init(1, &ring, 0); 33 + if (ret) { 34 + fprintf(stderr, "queue init failed %d\n", ret); 35 + return T_EXIT_FAIL; 36 + } 37 + 38 + ring_size = ENTRIES * sizeof(struct io_uring_buf); 39 + 40 + for (i = 0; i < BGID_NR; i++) { 41 + int bgid = BGID_START + i; 42 + struct io_uring_buf_reg reg = { 43 + .ring_entries = ENTRIES, 44 + .bgid = bgid, 45 + .flags = IOU_PBUF_RING_MMAP, 46 + }; 47 + off_t off; 48 + 49 + ret = io_uring_register_buf_ring(&ring, &reg, 0); 50 + if (ret) { 51 + if (ret == -EINVAL) 52 + return T_EXIT_SKIP; 53 + fprintf(stderr, "reg buf ring: %d\n", ret); 54 + return T_EXIT_FAIL; 55 + } 56 + 57 + off = IORING_OFF_PBUF_RING | 58 + (unsigned long long) bgid << IORING_OFF_PBUF_SHIFT; 59 + br[i] = mmap(NULL, ring_size, PROT_READ | PROT_WRITE, 60 + MAP_SHARED | MAP_POPULATE, ring.ring_fd, off); 61 + if (br[i] == MAP_FAILED) { 62 + perror("mmap"); 63 + return T_EXIT_FAIL; 64 + } 65 + } 66 + 67 + for (i = 0; i < BGID_NR; i++) { 68 + ret = io_uring_unregister_buf_ring(&ring, BGID_START + i); 69 + if (ret) { 70 + fprintf(stderr, "reg buf ring: %d\n", ret); 71 + return T_EXIT_FAIL; 72 + } 73 + } 74 + 75 + for (j = 0; j < 1000; j++) { 76 + for (i = 0; i < BGID_NR; i++) 77 + memset(br[i], 0x5a, ring_size); 78 + usleep(1000); 79 + } 80 + 81 + io_uring_queue_exit(&ring); 82 + return T_EXIT_PASS; 83 + }

+57 -18

vendor/liburing/test/buf-ring.c

··· 292 292 return cqe->flags >> 16; 293 293 } 294 294 295 - static int test_running(int bgid, int entries, int loops) 295 + static int test_running(int bgid, int entries, int loops, int use_mmap) 296 296 { 297 297 int ring_mask = io_uring_buf_ring_mask(entries); 298 298 struct io_uring_buf_ring *br; ··· 303 303 304 304 ret = t_create_ring(1, &ring, 0); 305 305 if (ret == T_SETUP_SKIP) 306 - return 0; 306 + return T_EXIT_SKIP; 307 307 else if (ret != T_SETUP_OK) 308 - return 1; 308 + return T_EXIT_FAIL; 309 309 310 - br = io_uring_setup_buf_ring(&ring, entries, bgid, 0, &ret); 311 - if (!br) { 312 - /* by now should have checked if this is supported or not */ 313 - fprintf(stderr, "Buffer ring register failed %d\n", ret); 314 - return 1; 310 + if (!use_mmap) { 311 + br = io_uring_setup_buf_ring(&ring, entries, bgid, 0, &ret); 312 + if (!br) { 313 + /* by now should have checked if this is supported or not */ 314 + fprintf(stderr, "Buffer ring register failed %d\n", ret); 315 + return T_EXIT_FAIL; 316 + } 317 + } else { 318 + struct io_uring_buf_reg reg = { 319 + .ring_entries = entries, 320 + .bgid = bgid, 321 + .flags = IOU_PBUF_RING_MMAP, 322 + }; 323 + size_t ring_size; 324 + off_t off; 325 + 326 + ret = io_uring_register_buf_ring(&ring, &reg, 0); 327 + if (ret) { 328 + if (ret == -EINVAL) 329 + return T_EXIT_SKIP; 330 + fprintf(stderr, "mmap ring register failed %d\n", ret); 331 + return T_EXIT_FAIL; 332 + } 333 + 334 + off = IORING_OFF_PBUF_RING | 335 + (unsigned long long) bgid << IORING_OFF_PBUF_SHIFT; 336 + ring_size = sizeof(struct io_uring_buf) * entries; 337 + br = mmap(NULL, ring_size, PROT_READ | PROT_WRITE, 338 + MAP_SHARED | MAP_POPULATE, ring.ring_fd, off); 339 + if (br == MAP_FAILED) { 340 + perror("mmap"); 341 + return T_EXIT_FAIL; 342 + } 315 343 } 316 344 317 345 buffers = malloc(sizeof(bool) * entries); 318 346 if (!buffers) 319 - return 1; 347 + return T_EXIT_SKIP; 320 348 321 349 read_fd = open("/dev/zero", O_RDONLY); 322 350 if (read_fd < 0) 323 - return 1; 351 + return T_EXIT_SKIP; 324 352 325 353 for (loop = 0; loop < loops; loop++) { 326 354 memset(buffers, 0, sizeof(bool) * entries); ··· 333 361 ret = test_one_read(read_fd, bgid, &ring); 334 362 if (ret < 0) { 335 363 fprintf(stderr, "bad run %d/%d = %d\n", loop, idx, ret); 336 - return ret; 364 + return T_EXIT_FAIL; 337 365 } 338 366 if (buffers[ret]) { 339 367 fprintf(stderr, "reused buffer %d/%d = %d!\n", loop, idx, ret); 340 - return 1; 368 + return T_EXIT_FAIL; 341 369 } 342 370 if (buffer[0] != 0) { 343 371 fprintf(stderr, "unexpected read %d %d/%d = %d!\n", 344 372 (int)buffer[0], loop, idx, ret); 345 - return 1; 373 + return T_EXIT_FAIL; 346 374 } 347 375 if (buffer[1] != 1) { 348 376 fprintf(stderr, "unexpected spilled read %d %d/%d = %d!\n", 349 377 (int)buffer[1], loop, idx, ret); 350 - return 1; 378 + return T_EXIT_FAIL; 351 379 } 352 380 buffers[ret] = true; 353 381 } 354 382 ret = test_one_read(read_fd, bgid, &ring); 355 383 if (ret != -ENOBUFS) { 356 384 fprintf(stderr, "expected enobufs run %d = %d\n", loop, ret); 357 - return 1; 385 + return T_EXIT_FAIL; 358 386 } 359 387 360 388 } ··· 362 390 ret = io_uring_unregister_buf_ring(&ring, bgid); 363 391 if (ret) { 364 392 fprintf(stderr, "Buffer ring register failed %d\n", ret); 365 - return 1; 393 + return T_EXIT_FAIL; 366 394 } 367 395 368 396 close(read_fd); 369 397 io_uring_queue_exit(&ring); 370 398 free(buffers); 371 - return 0; 399 + return T_EXIT_PASS; 372 400 } 373 401 374 402 int main(int argc, char *argv[]) ··· 423 451 } 424 452 425 453 for (i = 0; !no_buf_ring && entries[i] != -1; i++) { 426 - ret = test_running(2, entries[i], 3); 454 + ret = test_running(2, entries[i], 3, 0); 427 455 if (ret) { 428 456 fprintf(stderr, "test_running(%d) failed\n", entries[i]); 429 457 return T_EXIT_FAIL; 430 458 } 431 459 } 460 + 461 + for (i = 0; !no_buf_ring && entries[i] != -1; i++) { 462 + ret = test_running(2, entries[i], 3, 1); 463 + if (ret == T_EXIT_SKIP) { 464 + break; 465 + } else if (ret != T_EXIT_PASS) { 466 + fprintf(stderr, "test_running(%d) mmap failed\n", entries[i]); 467 + return T_EXIT_FAIL; 468 + } 469 + } 470 + 432 471 433 472 return T_EXIT_PASS; 434 473 }

+4

vendor/liburing/test/config

··· 1 1 # Copy this to config.local, uncomment and define values 2 2 # 3 + # NOTE: any files or devices added here will be used by tests that take 4 + # a file or device arguments This includes tests that are destructive with 5 + # respect to data contents. They may get erased or overwritten as part of tests. 6 + # 3 7 # Define tests to exclude from running 4 8 # TEST_EXCLUDE="" 5 9 #

+46 -11

vendor/liburing/test/connect.c

··· 133 133 return ret; 134 134 } 135 135 136 - static int connect_socket(struct io_uring *ring, int fd, int *code) 136 + static int connect_socket(struct io_uring *ring, int fd, int *code, int async) 137 137 { 138 138 struct sockaddr_in addr; 139 139 int ret, res; ··· 150 150 } 151 151 152 152 io_uring_prep_connect(sqe, fd, (struct sockaddr*)&addr, sizeof(addr)); 153 + if (async) 154 + sqe->flags |= IOSQE_ASYNC; 153 155 sqe->user_data = 1; 154 156 155 157 ret = submit_and_wait(ring, &res); ··· 186 188 if (connect_fd == -1) 187 189 return -1; 188 190 189 - ret = connect_socket(ring, connect_fd, &code); 191 + ret = connect_socket(ring, connect_fd, &code, 0); 190 192 if (ret == -1) 191 193 goto err; 192 194 ··· 209 211 return -1; 210 212 } 211 213 212 - static int test_connect(struct io_uring *ring) 214 + static int test_connect(struct io_uring *ring, int async) 213 215 { 214 216 int accept_fd; 215 217 int connect_fd; ··· 227 229 if (connect_fd == -1) 228 230 goto err1; 229 231 230 - ret = connect_socket(ring, connect_fd, &code); 232 + ret = connect_socket(ring, connect_fd, &code, async); 231 233 if (ret == -1) 232 234 goto err2; 233 235 ··· 296 298 } 297 299 298 300 // We first connect with one client socket in order to fill the accept queue. 299 - ret = connect_socket(ring, connect_fd[0], &code); 301 + ret = connect_socket(ring, connect_fd[0], &code, 0); 300 302 if (ret == -1 || code != 0) { 301 303 fprintf(stderr, "unable to connect\n"); 302 304 goto err; ··· 363 365 return -1; 364 366 } 365 367 366 - int main(int argc, char *argv[]) 368 + static int test(int flags) 367 369 { 368 370 struct io_uring ring; 369 371 int ret; 370 372 371 - if (argc > 1) 372 - return T_EXIT_SKIP; 373 - 374 - ret = io_uring_queue_init(8, &ring, 0); 373 + ret = io_uring_queue_init(8, &ring, flags); 375 374 if (ret) { 376 375 fprintf(stderr, "io_uring_queue_setup() = %d\n", ret); 377 376 return T_EXIT_FAIL; ··· 390 389 if (no_connect) 391 390 return T_EXIT_SKIP; 392 391 393 - ret = test_connect(&ring); 392 + ret = test_connect(&ring, 0); 393 + if (ret == -1) { 394 + fprintf(stderr, "test_connect(): failed\n"); 395 + return T_EXIT_FAIL; 396 + } 397 + 398 + ret = test_connect(&ring, 1); 394 399 if (ret == -1) { 395 400 fprintf(stderr, "test_connect(): failed\n"); 396 401 return T_EXIT_FAIL; ··· 405 410 io_uring_queue_exit(&ring); 406 411 return T_EXIT_PASS; 407 412 } 413 + 414 + int main(int argc, char *argv[]) 415 + { 416 + int ret; 417 + 418 + if (argc > 1) 419 + return T_EXIT_SKIP; 420 + 421 + ret = test(0); 422 + if (ret == -1) { 423 + fprintf(stderr, "test 0 failed\n"); 424 + return T_EXIT_FAIL; 425 + } 426 + if (no_connect) 427 + return T_EXIT_SKIP; 428 + 429 + ret = test(IORING_SETUP_SQPOLL); 430 + if (ret == -1) { 431 + fprintf(stderr, "test SQPOLL failed\n"); 432 + return T_EXIT_FAIL; 433 + } 434 + 435 + ret = test(IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN); 436 + if (ret == -1) { 437 + fprintf(stderr, "test DEFER failed\n"); 438 + return T_EXIT_FAIL; 439 + } 440 + 441 + return T_EXIT_PASS; 442 + }

+60

vendor/liburing/test/coredump.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Description: trigger segfault. A recent 6.4-rc kernel introduced a bug 4 + * via vhost where segfaults for applications using io_uring 5 + * would hang in D state forever upon trying to generate the 6 + * core file. Perform a trivial test where a child process 7 + * generates a NULL pointer dereference and ensure that we don't 8 + * hang. 9 + * 10 + */ 11 + #include <stdio.h> 12 + #include <stdlib.h> 13 + #include <unistd.h> 14 + #include <sys/wait.h> 15 + 16 + #include "liburing.h" 17 + #include "helpers.h" 18 + 19 + static void test(void) 20 + { 21 + struct io_uring_sqe *sqe; 22 + struct io_uring ring; 23 + int *ptr = NULL; 24 + int fds[2]; 25 + char r1; 26 + 27 + if (pipe(fds) < 0) { 28 + perror("pipe"); 29 + exit(0); 30 + } 31 + 32 + io_uring_queue_init(8, &ring, 0); 33 + 34 + sqe = io_uring_get_sqe(&ring); 35 + io_uring_prep_read(sqe, fds[0], &r1, sizeof(r1), 0); 36 + sqe->flags = IOSQE_ASYNC; 37 + sqe->user_data = 1; 38 + 39 + io_uring_submit(&ring); 40 + *ptr = 0; 41 + exit(0); 42 + } 43 + 44 + int main(int argc, char *argv[]) 45 + { 46 + pid_t pid; 47 + int wstat; 48 + 49 + pid = fork(); 50 + if (pid < 0) { 51 + perror("fork"); 52 + return T_EXIT_SKIP; 53 + } else if (!pid) { 54 + test(); 55 + } 56 + 57 + wait(&wstat); 58 + unlink("core"); 59 + return T_EXIT_PASS; 60 + }

+173

vendor/liburing/test/defer-tw-timeout.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Description: test waiting for more events than what will be posted with 4 + * a timeout with DEFER_TASKRUN. All kernels should time out, 5 + * but a non-buggy kernel will end up with one CQE available 6 + * for reaping. Buggy kernels will not have processed the 7 + * task_work and will have 0 events. 8 + * 9 + */ 10 + #include <errno.h> 11 + #include <stdio.h> 12 + #include <unistd.h> 13 + #include <stdlib.h> 14 + #include <string.h> 15 + #include <pthread.h> 16 + 17 + #include "liburing.h" 18 + #include "helpers.h" 19 + 20 + struct d { 21 + int fd; 22 + }; 23 + 24 + static void *thread_fn(void *data) 25 + { 26 + struct d *d = data; 27 + int ret; 28 + 29 + usleep(100000); 30 + ret = write(d->fd, "Hello", 5); 31 + if (ret < 0) 32 + perror("write"); 33 + return NULL; 34 + } 35 + 36 + static int test_poll(struct io_uring *ring) 37 + { 38 + struct io_uring_cqe *cqe; 39 + struct io_uring_sqe *sqe; 40 + struct __kernel_timespec ts; 41 + int ret, fds[2], i; 42 + pthread_t thread; 43 + char buf[32]; 44 + struct d d; 45 + void *tret; 46 + 47 + if (pipe(fds) < 0) { 48 + perror("pipe"); 49 + return 1; 50 + } 51 + d.fd = fds[1]; 52 + 53 + sqe = io_uring_get_sqe(ring); 54 + io_uring_prep_read(sqe, fds[0], buf, sizeof(buf), 0); 55 + 56 + pthread_create(&thread, NULL, thread_fn, &d); 57 + 58 + ts.tv_sec = 1; 59 + ts.tv_nsec = 0; 60 + 61 + ret = io_uring_submit_and_wait_timeout(ring, &cqe, 2, &ts, NULL); 62 + if (ret != 1) { 63 + fprintf(stderr, "unexpected wait ret %d\n", ret); 64 + return T_EXIT_FAIL; 65 + } 66 + 67 + for (i = 0; i < 2; i++) { 68 + ret = io_uring_peek_cqe(ring, &cqe); 69 + if (ret) 70 + break; 71 + io_uring_cqe_seen(ring, cqe); 72 + } 73 + 74 + if (i != 1) { 75 + fprintf(stderr, "Got %d request, expected 1\n", i); 76 + return T_EXIT_FAIL; 77 + } 78 + 79 + pthread_join(thread, &tret); 80 + return T_EXIT_PASS; 81 + } 82 + 83 + static int test_file(struct io_uring *ring, char *__fname) 84 + { 85 + struct io_uring_cqe *cqe; 86 + struct io_uring_sqe *sqe; 87 + struct __kernel_timespec ts; 88 + char filename[64], *fname; 89 + int fd, ret, i; 90 + void *buf; 91 + 92 + if (!__fname) { 93 + fname = filename; 94 + sprintf(fname, ".defer-tw-timeout.%d", getpid()); 95 + t_create_file(fname, 128*1024); 96 + } else { 97 + fname = __fname; 98 + } 99 + 100 + fd = open(fname, O_RDONLY | O_DIRECT); 101 + if (fd < 0) { 102 + if (errno == EINVAL) { 103 + if (!__fname) 104 + unlink(fname); 105 + return T_EXIT_SKIP; 106 + } 107 + perror("open"); 108 + if (!__fname) 109 + unlink(fname); 110 + return T_EXIT_FAIL; 111 + } 112 + 113 + if (!__fname) 114 + unlink(fname); 115 + 116 + if (posix_memalign(&buf, 4096, 4096)) { 117 + close(fd); 118 + return T_EXIT_FAIL; 119 + } 120 + 121 + sqe = io_uring_get_sqe(ring); 122 + io_uring_prep_read(sqe, fd, buf, 4096, 0); 123 + 124 + ts.tv_sec = 1; 125 + ts.tv_nsec = 0; 126 + 127 + ret = io_uring_submit_and_wait_timeout(ring, &cqe, 2, &ts, NULL); 128 + if (ret != 1) { 129 + fprintf(stderr, "unexpected wait ret %d\n", ret); 130 + close(fd); 131 + return T_EXIT_FAIL; 132 + } 133 + 134 + for (i = 0; i < 2; i++) { 135 + ret = io_uring_peek_cqe(ring, &cqe); 136 + if (ret) 137 + break; 138 + io_uring_cqe_seen(ring, cqe); 139 + } 140 + 141 + if (i != 1) { 142 + fprintf(stderr, "Got %d request, expected 1\n", i); 143 + close(fd); 144 + return T_EXIT_FAIL; 145 + } 146 + 147 + close(fd); 148 + return T_EXIT_PASS; 149 + } 150 + 151 + int main(int argc, char *argv[]) 152 + { 153 + struct io_uring ring; 154 + char *fname = NULL; 155 + int ret; 156 + 157 + ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN); 158 + if (ret == -EINVAL) 159 + return T_EXIT_SKIP; 160 + 161 + if (argc > 1) 162 + fname = argv[1]; 163 + 164 + ret = test_file(&ring, fname); 165 + if (ret != T_EXIT_PASS) 166 + return ret; 167 + 168 + ret = test_poll(&ring); 169 + if (ret != T_EXIT_PASS) 170 + return ret; 171 + 172 + return T_EXIT_PASS; 173 + }

+3 -3

vendor/liburing/test/defer.c

··· 88 88 return 0; 89 89 } 90 90 91 - static int test_cancelled_userdata(struct io_uring *ring) 91 + static int test_canceled_userdata(struct io_uring *ring) 92 92 { 93 93 struct test_context ctx; 94 94 int ret, i, nr = 100; ··· 276 276 } 277 277 278 278 279 - ret = test_cancelled_userdata(&poll_ring); 279 + ret = test_canceled_userdata(&poll_ring); 280 280 if (ret) { 281 - printf("test_cancelled_userdata failed\n"); 281 + printf("test_canceled_userdata failed\n"); 282 282 return ret; 283 283 } 284 284

+1 -1

vendor/liburing/test/eventfd-reg.c

··· 43 43 return T_EXIT_FAIL; 44 44 } 45 45 46 - /* Check that registrering again will get -EBUSY */ 46 + /* Check that registering again will get -EBUSY */ 47 47 ret = io_uring_register_eventfd(&ring, evfd[1]); 48 48 if (ret != -EBUSY) { 49 49 fprintf(stderr, "unexpected 2nd register: %d\n", ret);

+500

vendor/liburing/test/fd-install.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Description: test installing a direct descriptor into the regular 4 + * file table 5 + * 6 + */ 7 + #include <errno.h> 8 + #include <stdio.h> 9 + #include <unistd.h> 10 + #include <stdlib.h> 11 + #include <string.h> 12 + #include <fcntl.h> 13 + 14 + #include "liburing.h" 15 + #include "helpers.h" 16 + 17 + static int no_fd_install; 18 + 19 + /* test that O_CLOEXEC is accepted, and others are not */ 20 + static int test_flags(struct io_uring *ring, int async) 21 + { 22 + struct io_uring_sqe *sqe; 23 + struct io_uring_cqe *cqe; 24 + int ret, fds[2], fd; 25 + 26 + if (pipe(fds) < 0) { 27 + perror("pipe"); 28 + return T_EXIT_FAIL; 29 + } 30 + 31 + ret = io_uring_register_files(ring, &fds[0], 1); 32 + if (ret) { 33 + fprintf(stderr, "failed register files %d\n", ret); 34 + return T_EXIT_FAIL; 35 + } 36 + 37 + /* check that setting an invalid flag fails */ 38 + sqe = io_uring_get_sqe(ring); 39 + io_uring_prep_fixed_fd_install(sqe, 0, 1U << 17); 40 + io_uring_submit(ring); 41 + 42 + ret = io_uring_wait_cqe(ring, &cqe); 43 + if (ret) { 44 + fprintf(stderr, "wait cqe %d\n", ret); 45 + return T_EXIT_FAIL; 46 + } 47 + if (cqe->res != -EINVAL) { 48 + fprintf(stderr, "unexpected cqe res %d\n", cqe->res); 49 + return T_EXIT_FAIL; 50 + } 51 + io_uring_cqe_seen(ring, cqe); 52 + 53 + /* check that IORING_FIXED_FD_NO_CLOEXEC is accepted */ 54 + sqe = io_uring_get_sqe(ring); 55 + io_uring_prep_fixed_fd_install(sqe, 0, IORING_FIXED_FD_NO_CLOEXEC); 56 + if (async) 57 + sqe->flags |= IOSQE_ASYNC; 58 + io_uring_submit(ring); 59 + 60 + ret = io_uring_wait_cqe(ring, &cqe); 61 + if (ret) { 62 + fprintf(stderr, "wait cqe %d\n", ret); 63 + return T_EXIT_FAIL; 64 + } 65 + if (cqe->res < 0) { 66 + fprintf(stderr, "unexpected cqe res %d\n", cqe->res); 67 + return T_EXIT_FAIL; 68 + } 69 + fd = cqe->res; 70 + io_uring_cqe_seen(ring, cqe); 71 + 72 + close(fds[0]); 73 + close(fds[1]); 74 + close(fd); 75 + io_uring_unregister_files(ring); 76 + 77 + return T_EXIT_PASS; 78 + } 79 + 80 + static int test_linked(struct io_uring *ring) 81 + { 82 + struct io_uring_sqe *sqe; 83 + struct io_uring_cqe *cqe; 84 + int ret, fds[2], fd, i; 85 + 86 + if (pipe(fds) < 0) { 87 + perror("pipe"); 88 + return T_EXIT_FAIL; 89 + } 90 + 91 + ret = io_uring_register_files(ring, &fds[0], 1); 92 + if (ret) { 93 + fprintf(stderr, "failed register files %d\n", ret); 94 + return T_EXIT_FAIL; 95 + } 96 + 97 + sqe = io_uring_get_sqe(ring); 98 + io_uring_prep_nop(sqe); 99 + sqe->flags |= IOSQE_IO_LINK; 100 + sqe->user_data = 1; 101 + 102 + sqe = io_uring_get_sqe(ring); 103 + io_uring_prep_fixed_fd_install(sqe, 0, 0); 104 + sqe->user_data = 2; 105 + 106 + ret = io_uring_submit(ring); 107 + if (ret != 2) { 108 + fprintf(stderr, "submit: %d\n", ret); 109 + return T_EXIT_FAIL; 110 + } 111 + 112 + fd = -1; 113 + for (i = 0; i < 2; i++) { 114 + ret = io_uring_wait_cqe(ring, &cqe); 115 + if (ret) { 116 + fprintf(stderr, "wait cqe %d\n", ret); 117 + return T_EXIT_FAIL; 118 + } 119 + if (cqe->res < 0) { 120 + fprintf(stderr, "unexpected cqe res %d\n", cqe->res); 121 + return T_EXIT_FAIL; 122 + } 123 + if (cqe->user_data == 2) 124 + fd = cqe->res; 125 + io_uring_cqe_seen(ring, cqe); 126 + } 127 + 128 + close(fds[0]); 129 + close(fds[1]); 130 + if (fd != -1) 131 + close(fd); 132 + io_uring_unregister_files(ring); 133 + return T_EXIT_PASS; 134 + } 135 + 136 + /* test not setting IOSQE_FIXED_FILE */ 137 + static int test_not_fixed(struct io_uring *ring) 138 + { 139 + struct io_uring_sqe *sqe; 140 + struct io_uring_cqe *cqe; 141 + int ret, fds[2]; 142 + 143 + if (pipe(fds) < 0) { 144 + perror("pipe"); 145 + return T_EXIT_FAIL; 146 + } 147 + 148 + ret = io_uring_register_files(ring, &fds[0], 1); 149 + if (ret) { 150 + fprintf(stderr, "failed register files %d\n", ret); 151 + return T_EXIT_FAIL; 152 + } 153 + 154 + sqe = io_uring_get_sqe(ring); 155 + io_uring_prep_fixed_fd_install(sqe, 0, 0); 156 + sqe->flags &= ~IOSQE_FIXED_FILE; 157 + io_uring_submit(ring); 158 + 159 + ret = io_uring_wait_cqe(ring, &cqe); 160 + if (ret) { 161 + fprintf(stderr, "wait cqe %d\n", ret); 162 + return T_EXIT_FAIL; 163 + } 164 + if (cqe->res != -EBADF) { 165 + fprintf(stderr, "unexpected cqe res %d\n", cqe->res); 166 + return T_EXIT_FAIL; 167 + } 168 + 169 + io_uring_cqe_seen(ring, cqe); 170 + 171 + close(fds[0]); 172 + close(fds[1]); 173 + io_uring_unregister_files(ring); 174 + 175 + return T_EXIT_PASS; 176 + } 177 + 178 + /* test invalid direct descriptor indexes */ 179 + static int test_bad_fd(struct io_uring *ring, int some_fd) 180 + { 181 + struct io_uring_sqe *sqe; 182 + struct io_uring_cqe *cqe; 183 + int ret; 184 + 185 + sqe = io_uring_get_sqe(ring); 186 + io_uring_prep_fixed_fd_install(sqe, some_fd, 0); 187 + io_uring_submit(ring); 188 + 189 + ret = io_uring_wait_cqe(ring, &cqe); 190 + if (ret) { 191 + fprintf(stderr, "wait cqe %d\n", ret); 192 + return T_EXIT_FAIL; 193 + } 194 + if (cqe->res != -EBADF) { 195 + fprintf(stderr, "unexpected cqe res %d\n", cqe->res); 196 + return T_EXIT_FAIL; 197 + } 198 + 199 + io_uring_cqe_seen(ring, cqe); 200 + return T_EXIT_PASS; 201 + } 202 + 203 + /* test basic functionality of shifting a direct descriptor to a normal file */ 204 + static int test_working(struct io_uring *ring) 205 + { 206 + struct io_uring_sqe *sqe; 207 + struct io_uring_cqe *cqe; 208 + int ret, fds[2]; 209 + char buf[32]; 210 + 211 + if (pipe(fds) < 0) { 212 + perror("pipe"); 213 + return T_EXIT_FAIL; 214 + } 215 + 216 + /* register read side */ 217 + ret = io_uring_register_files(ring, &fds[0], 1); 218 + if (ret) { 219 + fprintf(stderr, "failed register files %d\n", ret); 220 + return T_EXIT_FAIL; 221 + } 222 + 223 + /* close normal descriptor */ 224 + close(fds[0]); 225 + 226 + /* normal read should fail */ 227 + ret = read(fds[0], buf, 1); 228 + if (ret != -1) { 229 + fprintf(stderr, "unexpected read ret %d\n", ret); 230 + return T_EXIT_FAIL; 231 + } 232 + if (errno != EBADF) { 233 + fprintf(stderr, "unexpected read failure %d\n", errno); 234 + return T_EXIT_FAIL; 235 + } 236 + 237 + /* verify we can read the data */ 238 + sqe = io_uring_get_sqe(ring); 239 + io_uring_prep_read(sqe, 0, buf, sizeof(buf), 0); 240 + sqe->flags |= IOSQE_FIXED_FILE; 241 + io_uring_submit(ring); 242 + 243 + /* put some data in the pipe */ 244 + ret = write(fds[1], "Hello", 5); 245 + if (ret < 0) { 246 + perror("write"); 247 + return T_EXIT_FAIL; 248 + } else if (ret != 5) { 249 + fprintf(stderr, "short write %d\n", ret); 250 + return T_EXIT_FAIL; 251 + } 252 + 253 + ret = io_uring_wait_cqe(ring, &cqe); 254 + if (ret) { 255 + fprintf(stderr, "wait cqe %d\n", ret); 256 + return T_EXIT_FAIL; 257 + } 258 + if (cqe->res != 5) { 259 + fprintf(stderr, "weird pipe read ret %d\n", cqe->res); 260 + return T_EXIT_FAIL; 261 + } 262 + io_uring_cqe_seen(ring, cqe); 263 + 264 + /* fixed pipe read worked, now re-install as a regular fd */ 265 + sqe = io_uring_get_sqe(ring); 266 + io_uring_prep_fixed_fd_install(sqe, 0, 0); 267 + io_uring_submit(ring); 268 + 269 + ret = io_uring_wait_cqe(ring, &cqe); 270 + if (ret) { 271 + fprintf(stderr, "wait cqe %d\n", ret); 272 + return T_EXIT_FAIL; 273 + } 274 + if (cqe->res == -EINVAL) { 275 + no_fd_install = 1; 276 + return T_EXIT_SKIP; 277 + } 278 + if (cqe->res < 0) { 279 + fprintf(stderr, "failed install fd: %d\n", cqe->res); 280 + return T_EXIT_FAIL; 281 + } 282 + /* stash new pipe read side fd in old spot */ 283 + fds[0] = cqe->res; 284 + io_uring_cqe_seen(ring, cqe); 285 + 286 + ret = write(fds[1], "Hello", 5); 287 + if (ret < 0) { 288 + perror("write"); 289 + return T_EXIT_FAIL; 290 + } else if (ret != 5) { 291 + fprintf(stderr, "short write %d\n", ret); 292 + return T_EXIT_FAIL; 293 + } 294 + 295 + /* normal pipe read should now work with new fd */ 296 + ret = read(fds[0], buf, sizeof(buf)); 297 + if (ret != 5) { 298 + fprintf(stderr, "unexpected read ret %d\n", ret); 299 + return T_EXIT_FAIL; 300 + } 301 + 302 + /* close fixed file */ 303 + sqe = io_uring_get_sqe(ring); 304 + io_uring_prep_close_direct(sqe, 0); 305 + io_uring_submit(ring); 306 + 307 + ret = io_uring_wait_cqe(ring, &cqe); 308 + if (ret) { 309 + fprintf(stderr, "wait cqe %d\n", ret); 310 + return T_EXIT_FAIL; 311 + } 312 + if (cqe->res) { 313 + fprintf(stderr, "close fixed fd %d\n", cqe->res); 314 + return T_EXIT_FAIL; 315 + } 316 + io_uring_cqe_seen(ring, cqe); 317 + 318 + ret = write(fds[1], "Hello", 5); 319 + if (ret < 0) { 320 + perror("write"); 321 + return T_EXIT_FAIL; 322 + } else if (ret != 5) { 323 + fprintf(stderr, "short write %d\n", ret); 324 + return T_EXIT_FAIL; 325 + } 326 + 327 + /* normal pipe read should still work with new fd */ 328 + ret = read(fds[0], buf, sizeof(buf)); 329 + if (ret != 5) { 330 + fprintf(stderr, "unexpected read ret %d\n", ret); 331 + return T_EXIT_FAIL; 332 + } 333 + 334 + /* fixed fd pipe read should now fail */ 335 + sqe = io_uring_get_sqe(ring); 336 + io_uring_prep_read(sqe, 0, buf, sizeof(buf), 0); 337 + sqe->flags = IOSQE_FIXED_FILE; 338 + io_uring_submit(ring); 339 + 340 + /* put some data in the pipe */ 341 + ret = write(fds[1], "Hello", 5); 342 + if (ret < 0) { 343 + perror("write"); 344 + return T_EXIT_FAIL; 345 + } else if (ret != 5) { 346 + fprintf(stderr, "short write %d\n", ret); 347 + return T_EXIT_FAIL; 348 + } 349 + 350 + ret = io_uring_wait_cqe(ring, &cqe); 351 + if (ret) { 352 + fprintf(stderr, "wait cqe %d\n", ret); 353 + return T_EXIT_FAIL; 354 + } 355 + if (cqe->res != -EBADF) { 356 + fprintf(stderr, "weird pipe read ret %d\n", cqe->res); 357 + return T_EXIT_FAIL; 358 + } 359 + io_uring_cqe_seen(ring, cqe); 360 + 361 + close(fds[0]); 362 + close(fds[1]); 363 + io_uring_unregister_files(ring); 364 + return T_EXIT_PASS; 365 + } 366 + 367 + static int test_creds(struct io_uring *ring, int async) 368 + { 369 + struct io_uring_sqe *sqe; 370 + struct io_uring_cqe *cqe; 371 + int cred_id, ret, fds[2]; 372 + 373 + if (pipe(fds) < 0) { 374 + perror("pipe"); 375 + return T_EXIT_FAIL; 376 + } 377 + 378 + ret = io_uring_register_files(ring, &fds[0], 1); 379 + if (ret) { 380 + fprintf(stderr, "failed register files %d\n", ret); 381 + return T_EXIT_FAIL; 382 + } 383 + 384 + cred_id = io_uring_register_personality(ring); 385 + if (cred_id < 0) { 386 + fprintf(stderr, "Failed registering creds: %d\n", cred_id); 387 + return T_EXIT_FAIL; 388 + } 389 + 390 + /* check that asking for creds fails */ 391 + sqe = io_uring_get_sqe(ring); 392 + io_uring_prep_fixed_fd_install(sqe, 0, 0); 393 + if (async) 394 + sqe->flags |= IOSQE_ASYNC; 395 + sqe->personality = cred_id; 396 + io_uring_submit(ring); 397 + 398 + ret = io_uring_wait_cqe(ring, &cqe); 399 + if (ret) { 400 + fprintf(stderr, "wait cqe %d\n", ret); 401 + return T_EXIT_FAIL; 402 + } 403 + if (cqe->res > 0) { 404 + fprintf(stderr, "install succeeded with creds\n"); 405 + return T_EXIT_FAIL; 406 + } 407 + if (cqe->res != -EPERM) { 408 + fprintf(stderr, "unexpected cqe res %d\n", cqe->res); 409 + return T_EXIT_FAIL; 410 + } 411 + io_uring_cqe_seen(ring, cqe); 412 + 413 + close(fds[0]); 414 + close(fds[1]); 415 + io_uring_unregister_files(ring); 416 + io_uring_unregister_personality(ring, cred_id); 417 + return T_EXIT_PASS; 418 + } 419 + 420 + int main(int argc, char *argv[]) 421 + { 422 + struct io_uring ring; 423 + int ret; 424 + 425 + if (argc > 1) 426 + return T_EXIT_SKIP; 427 + 428 + ret = io_uring_queue_init(4, &ring, 0); 429 + if (ret) { 430 + fprintf(stderr, "ring setup failed: %d\n", ret); 431 + return T_EXIT_FAIL; 432 + } 433 + 434 + ret = test_working(&ring); 435 + if (ret != T_EXIT_PASS) { 436 + if (ret == T_EXIT_FAIL) 437 + fprintf(stderr, "test_working failed\n"); 438 + return ret; 439 + } 440 + if (no_fd_install) 441 + return T_EXIT_SKIP; 442 + 443 + ret = test_bad_fd(&ring, 0); 444 + if (ret != T_EXIT_PASS) { 445 + if (ret == T_EXIT_FAIL) 446 + fprintf(stderr, "test_bad_fd 0 failed\n"); 447 + return ret; 448 + } 449 + 450 + ret = test_bad_fd(&ring, 500); 451 + if (ret != T_EXIT_PASS) { 452 + if (ret == T_EXIT_FAIL) 453 + fprintf(stderr, "test_bad_fd 500 failed\n"); 454 + return ret; 455 + } 456 + 457 + ret = test_not_fixed(&ring); 458 + if (ret != T_EXIT_PASS) { 459 + if (ret == T_EXIT_FAIL) 460 + fprintf(stderr, "test_not_fixed failed\n"); 461 + return ret; 462 + } 463 + 464 + ret = test_flags(&ring, 0); 465 + if (ret != T_EXIT_PASS) { 466 + if (ret == T_EXIT_FAIL) 467 + fprintf(stderr, "test_flags 0 failed\n"); 468 + return ret; 469 + } 470 + 471 + ret = test_flags(&ring, 1); 472 + if (ret != T_EXIT_PASS) { 473 + if (ret == T_EXIT_FAIL) 474 + fprintf(stderr, "test_flags 1 failed\n"); 475 + return ret; 476 + } 477 + 478 + ret = test_creds(&ring, 0); 479 + if (ret != T_EXIT_PASS) { 480 + if (ret == T_EXIT_FAIL) 481 + fprintf(stderr, "test_creds 0 failed\n"); 482 + return ret; 483 + } 484 + 485 + ret = test_creds(&ring, 1); 486 + if (ret != T_EXIT_PASS) { 487 + if (ret == T_EXIT_FAIL) 488 + fprintf(stderr, "test_creds 1 failed\n"); 489 + return ret; 490 + } 491 + 492 + ret = test_linked(&ring); 493 + if (ret != T_EXIT_PASS) { 494 + if (ret == T_EXIT_FAIL) 495 + fprintf(stderr, "test_linked failed\n"); 496 + return ret; 497 + } 498 + 499 + return T_EXIT_PASS; 500 + }

+7 -3

vendor/liburing/test/file-register.c

··· 305 305 files = open_files(100, 100, 0); 306 306 ret = io_uring_register_files(ring, files, 200); 307 307 if (ret) { 308 - if (ret == -EBADF) { 308 + if (ret == -EBADF || ret == -EINVAL) { 309 309 fprintf(stdout, "Sparse files not supported, skipping\n"); 310 310 no_update = 1; 311 311 goto done; ··· 352 352 static int test_basic(struct io_uring *ring, int fail) 353 353 { 354 354 int *files; 355 - int ret; 355 + int ret, i; 356 356 int nr_files = fail ? 10 : 100; 357 357 358 - files = open_files(nr_files, 0, 0); 358 + files = open_files(nr_files, fail ? 90 : 0, 0); 359 + if (fail) { 360 + for (i = nr_files; i < nr_files + 90; i++) 361 + files[i] = -2; 362 + } 359 363 ret = io_uring_register_files(ring, files, 100); 360 364 if (ret) { 361 365 if (fail) {

+1 -1

vendor/liburing/test/file-verify.c

··· 28 28 #define MAX_VECS 16 29 29 30 30 /* 31 - * Can be anything, let's just do something for a bit of parallellism 31 + * Can be anything, let's just do something for a bit of parallelism 32 32 */ 33 33 #define READ_BATCH 16 34 34

+101

vendor/liburing/test/fixed-buf-merge.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Test fixed buffer merging/skipping 4 + * 5 + * Taken from: https://github.com/axboe/liburing/issues/994 6 + * 7 + */ 8 + #include <stdio.h> 9 + #include <string.h> 10 + #include <fcntl.h> 11 + #include <stdlib.h> 12 + #include <unistd.h> 13 + 14 + #include "liburing.h" 15 + #include "helpers.h" 16 + 17 + int main(int argc, char *argv[]) 18 + { 19 + int ret, i, fd, initial_offset = 4096, num_requests = 3; 20 + struct io_uring ring; 21 + struct io_uring_sqe *sqe; 22 + struct io_uring_cqe *cqe; 23 + struct iovec iov; 24 + char *buffer, *to_free; 25 + unsigned head; 26 + char filename[64]; 27 + 28 + ret = io_uring_queue_init(4, &ring, 0); 29 + if (ret) { 30 + fprintf(stderr, "queue_init: %d\n", ret); 31 + return T_EXIT_FAIL; 32 + } 33 + 34 + sprintf(filename, ".fixed-buf-%d", getpid()); 35 + t_create_file(filename, 4 * 4096); 36 + 37 + fd = open(filename, O_RDONLY | O_DIRECT, 0644); 38 + if (fd < 0) { 39 + if (errno == EINVAL) { 40 + unlink(filename); 41 + return T_EXIT_SKIP; 42 + } 43 + perror("open"); 44 + goto err_unlink; 45 + } 46 + 47 + to_free = buffer = aligned_alloc(4096, 128 * 4096); 48 + if (!buffer) { 49 + perror("aligned_alloc"); 50 + goto err_unlink; 51 + } 52 + 53 + /* Register buffer */ 54 + iov.iov_base = buffer; 55 + iov.iov_len = 128 * 4096; 56 + 57 + ret = io_uring_register_buffers(&ring, &iov, 1); 58 + if (ret) { 59 + fprintf(stderr, "buf register: %d\n", ret); 60 + goto err_unlink; 61 + } 62 + 63 + /* Prepare read requests */ 64 + buffer += initial_offset; 65 + for (i = 0; i < num_requests; i++) { 66 + sqe = io_uring_get_sqe(&ring); 67 + io_uring_prep_read_fixed(sqe, fd, buffer, 4096, 4096 * i, 0); 68 + buffer += 4096; 69 + } 70 + 71 + /* Submit requests and reap completions */ 72 + ret = io_uring_submit_and_wait(&ring, num_requests); 73 + if (ret != num_requests) { 74 + fprintf(stderr, "Submit and wait: %d\n", ret); 75 + goto err_unlink; 76 + } 77 + 78 + i = 0; 79 + io_uring_for_each_cqe(&ring, head, cqe) { 80 + if (cqe->res != 4096) { 81 + fprintf(stderr, "cqe: %d\n", cqe->res); 82 + goto err_unlink; 83 + } 84 + i++; 85 + } 86 + 87 + if (i != num_requests) { 88 + fprintf(stderr, "Got %d completions\n", i); 89 + goto err_unlink; 90 + } 91 + 92 + io_uring_cq_advance(&ring, i); 93 + io_uring_queue_exit(&ring); 94 + close(fd); 95 + free(to_free); 96 + unlink(filename); 97 + return T_EXIT_PASS; 98 + err_unlink: 99 + unlink(filename); 100 + return T_EXIT_FAIL; 101 + }

+411

vendor/liburing/test/fixed-hugepage.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Test fixed buffers consisting of hugepages. 4 + */ 5 + #include <stdio.h> 6 + #include <string.h> 7 + #include <fcntl.h> 8 + #include <stdlib.h> 9 + #include <sys/mman.h> 10 + #include <linux/mman.h> 11 + 12 + #include "liburing.h" 13 + #include "helpers.h" 14 + 15 + /* 16 + * Before testing 17 + * echo (>=4) > /proc/sys/vm/nr_hugepages 18 + * echo madvise > /sys/kernel/mm/transparent_hugepage/enabled 19 + * echo always > /sys/kernel/mm/transparent_hugepage/hugepages-16kB/enabled 20 + * 21 + * Not 100% guaranteed to get THP-backed memory, but in general it does. 22 + */ 23 + #define MTHP_16KB (16UL * 1024) 24 + #define HUGEPAGE_SIZE (2UL * 1024 * 1024) 25 + #define NR_BUFS 1 26 + #define IN_FD "/dev/urandom" 27 + #define OUT_FD "/dev/zero" 28 + 29 + static int open_files(char *fname_in, int *fd_in, int *fd_out) 30 + { 31 + *fd_in = open(fname_in, O_RDONLY, 0644); 32 + if (*fd_in < 0) { 33 + printf("open %s failed\n", fname_in); 34 + return -1; 35 + } 36 + 37 + *fd_out = open(OUT_FD, O_RDWR, 0644); 38 + if (*fd_out < 0) { 39 + printf("open %s failed\n", OUT_FD); 40 + return -1; 41 + } 42 + 43 + return 0; 44 + } 45 + 46 + static void unmap(struct iovec *iov, int nr_bufs, size_t offset) 47 + { 48 + int i; 49 + 50 + for (i = 0; i < nr_bufs; i++) 51 + munmap(iov[i].iov_base - offset, iov[i].iov_len + offset); 52 + } 53 + 54 + static int mmap_hugebufs(struct iovec *iov, int nr_bufs, size_t buf_size, size_t offset) 55 + { 56 + int i; 57 + 58 + for (i = 0; i < nr_bufs; i++) { 59 + void *base = NULL; 60 + 61 + base = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, 62 + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); 63 + if (base == MAP_FAILED) { 64 + printf("Unable to map hugetlb page. Try increasing the " 65 + "value in /proc/sys/vm/nr_hugepages\n"); 66 + unmap(iov, i, offset); 67 + return -1; 68 + } 69 + 70 + memset(base, 0, buf_size); 71 + iov[i].iov_base = base + offset; 72 + iov[i].iov_len = buf_size - offset; 73 + } 74 + 75 + return 0; 76 + } 77 + 78 + /* map a hugepage and smaller page to a contiguous memory */ 79 + static int mmap_mixture(struct iovec *iov, int nr_bufs, size_t buf_size, bool huge_on_left) 80 + { 81 + int i; 82 + void *small_base = NULL, *huge_base = NULL, *start = NULL, 83 + *huge_start = NULL, *small_start = NULL; 84 + size_t small_size = buf_size - HUGEPAGE_SIZE; 85 + size_t seg_size = ((buf_size / HUGEPAGE_SIZE) + 1) * HUGEPAGE_SIZE; 86 + 87 + start = mmap(NULL, seg_size * nr_bufs, PROT_NONE, 88 + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); 89 + if (start == MAP_FAILED) { 90 + printf("Unable to preserve the page mixture memory. " 91 + "Try increasing the RLIMIT_MEMLOCK resource limit\n"); 92 + return -1; 93 + } 94 + 95 + for (i = 0; i < nr_bufs; i++) { 96 + if (huge_on_left) { 97 + huge_start = start; 98 + small_start = start + HUGEPAGE_SIZE; 99 + } else { 100 + huge_start = start + HUGEPAGE_SIZE; 101 + small_start = start + HUGEPAGE_SIZE - small_size; 102 + } 103 + 104 + huge_base = mmap(huge_start, HUGEPAGE_SIZE, PROT_READ | PROT_WRITE, 105 + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED, -1, 0); 106 + if (huge_base == MAP_FAILED) { 107 + printf("Unable to map hugetlb page in the page mixture. " 108 + "Try increasing the value in /proc/sys/vm/nr_hugepages\n"); 109 + unmap(iov, nr_bufs, 0); 110 + return -1; 111 + } 112 + 113 + small_base = mmap(small_start, small_size, PROT_READ | PROT_WRITE, 114 + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); 115 + if (small_base == MAP_FAILED) { 116 + printf("Unable to map small page in the page mixture. " 117 + "Try increasing the RLIMIT_MEMLOCK resource limit\n"); 118 + unmap(iov, nr_bufs, 0); 119 + return -1; 120 + } 121 + 122 + if (huge_on_left) { 123 + iov[i].iov_base = huge_base; 124 + memset(huge_base, 0, buf_size); 125 + } 126 + else { 127 + iov[i].iov_base = small_base; 128 + memset(small_base, 0, buf_size); 129 + } 130 + iov[i].iov_len = buf_size; 131 + start += seg_size; 132 + } 133 + 134 + return 0; 135 + } 136 + 137 + static void free_bufs(struct iovec *iov, int nr_bufs, size_t offset) 138 + { 139 + int i; 140 + 141 + for (i = 0; i < nr_bufs; i++) 142 + free(iov[i].iov_base - offset); 143 + } 144 + 145 + static int get_mthp_bufs(struct iovec *iov, int nr_bufs, size_t buf_size, 146 + size_t alignment, size_t offset) 147 + { 148 + int i; 149 + 150 + for (i = 0; i < nr_bufs; i++) { 151 + void *base = NULL; 152 + 153 + if (posix_memalign(&base, alignment, buf_size)) { 154 + printf("Unable to allocate mthp pages. " 155 + "Try increasing the RLIMIT_MEMLOCK resource limit\n"); 156 + free_bufs(iov, i, offset); 157 + return -1; 158 + } 159 + 160 + memset(base, 0, buf_size); 161 + iov[i].iov_base = base + offset; 162 + iov[i].iov_len = buf_size - offset; 163 + } 164 + 165 + return 0; 166 + } 167 + 168 + static int do_read(struct io_uring *ring, int fd, struct iovec *iov, int nr_bufs) 169 + { 170 + struct io_uring_sqe *sqe; 171 + struct io_uring_cqe *cqe; 172 + int i, ret; 173 + 174 + for (i = 0; i < nr_bufs; i++) { 175 + sqe = io_uring_get_sqe(ring); 176 + if (!sqe) { 177 + fprintf(stderr, "Could not get SQE.\n"); 178 + return -1; 179 + } 180 + 181 + io_uring_prep_read_fixed(sqe, fd, iov[i].iov_base, iov[i].iov_len, 0, i); 182 + io_uring_submit(ring); 183 + 184 + ret = io_uring_wait_cqe(ring, &cqe); 185 + if (ret < 0) { 186 + fprintf(stderr, "Error waiting for completion: %s\n", strerror(-ret)); 187 + return -1; 188 + } 189 + 190 + if (cqe->res < 0) { 191 + fprintf(stderr, "Error in async read operation: %s\n", strerror(-cqe->res)); 192 + return -1; 193 + } 194 + if (cqe->res != iov[i].iov_len) { 195 + fprintf(stderr, "cqe res: %d, expected: %lu\n", cqe->res, (unsigned long) iov[i].iov_len); 196 + return -1; 197 + } 198 + 199 + io_uring_cqe_seen(ring, cqe); 200 + } 201 + 202 + return 0; 203 + } 204 + 205 + static int do_write(struct io_uring *ring, int fd, struct iovec *iov, int nr_bufs) 206 + { 207 + struct io_uring_sqe *sqe; 208 + struct io_uring_cqe *cqe; 209 + int i, ret; 210 + 211 + for (i = 0; i < nr_bufs; i++) { 212 + sqe = io_uring_get_sqe(ring); 213 + if (!sqe) { 214 + fprintf(stderr, "Could not get SQE.\n"); 215 + return -1; 216 + } 217 + 218 + io_uring_prep_write_fixed(sqe, fd, iov[i].iov_base, iov[i].iov_len, 0, i); 219 + io_uring_submit(ring); 220 + 221 + ret = io_uring_wait_cqe(ring, &cqe); 222 + if (ret < 0) { 223 + fprintf(stderr, "Error waiting for completion: %s\n", strerror(-ret)); 224 + return -1; 225 + } 226 + 227 + if (cqe->res < 0) { 228 + fprintf(stderr, "Error in async write operation: %s\n", strerror(-cqe->res)); 229 + return -1; 230 + } 231 + if (cqe->res != iov[i].iov_len) { 232 + fprintf(stderr, "cqe res: %d, expected: %lu\n", cqe->res, (unsigned long) iov[i].iov_len); 233 + return -1; 234 + } 235 + 236 + io_uring_cqe_seen(ring, cqe); 237 + } 238 + 239 + return 0; 240 + } 241 + 242 + static int register_submit(struct io_uring *ring, struct iovec *iov, 243 + int nr_bufs, int fd_in, int fd_out) 244 + { 245 + int ret; 246 + 247 + ret = io_uring_register_buffers(ring, iov, nr_bufs); 248 + if (ret) { 249 + fprintf(stderr, "Error registering buffers: %s\n", strerror(-ret)); 250 + return ret; 251 + } 252 + 253 + ret = do_read(ring, fd_in, iov, nr_bufs); 254 + if (ret) { 255 + fprintf(stderr, "Read test failed\n"); 256 + return ret; 257 + } 258 + 259 + ret = do_write(ring, fd_out, iov, nr_bufs); 260 + if (ret) { 261 + fprintf(stderr, "Write test failed\n"); 262 + return ret; 263 + } 264 + 265 + ret = io_uring_unregister_buffers(ring); 266 + if (ret) { 267 + fprintf(stderr, "Error unregistering buffers for one hugepage test: %s", strerror(-ret)); 268 + return ret; 269 + } 270 + 271 + return 0; 272 + } 273 + 274 + static int test_one_hugepage(struct io_uring *ring, int fd_in, int fd_out) 275 + { 276 + struct iovec iov[NR_BUFS]; 277 + size_t buf_size = HUGEPAGE_SIZE; 278 + int ret; 279 + 280 + if (mmap_hugebufs(iov, NR_BUFS, buf_size, 0)) 281 + return T_EXIT_SKIP; 282 + 283 + ret = register_submit(ring, iov, NR_BUFS, fd_in, fd_out); 284 + unmap(iov, NR_BUFS, 0); 285 + return ret ? T_EXIT_FAIL : T_EXIT_PASS; 286 + } 287 + 288 + static int test_multi_hugepages(struct io_uring *ring, int fd_in, int fd_out) 289 + { 290 + struct iovec iov[NR_BUFS]; 291 + size_t buf_size = 4 * HUGEPAGE_SIZE; 292 + int ret; 293 + 294 + if (mmap_hugebufs(iov, NR_BUFS, buf_size, 0)) 295 + return T_EXIT_SKIP; 296 + 297 + ret = register_submit(ring, iov, NR_BUFS, fd_in, fd_out); 298 + unmap(iov, NR_BUFS, 0); 299 + return ret ? T_EXIT_FAIL : T_EXIT_PASS; 300 + } 301 + 302 + static int test_unaligned_hugepage(struct io_uring *ring, int fd_in, int fd_out) 303 + { 304 + struct iovec iov[NR_BUFS]; 305 + size_t buf_size = 3 * HUGEPAGE_SIZE; 306 + size_t offset = 0x1234; 307 + int ret; 308 + 309 + if (mmap_hugebufs(iov, NR_BUFS, buf_size, offset)) 310 + return T_EXIT_SKIP; 311 + 312 + ret = register_submit(ring, iov, NR_BUFS, fd_in, fd_out); 313 + unmap(iov, NR_BUFS, offset); 314 + return ret ? T_EXIT_FAIL : T_EXIT_PASS; 315 + } 316 + 317 + static int test_multi_unaligned_mthps(struct io_uring *ring, int fd_in, int fd_out) 318 + { 319 + struct iovec iov[NR_BUFS]; 320 + int ret; 321 + size_t buf_size = 3 * MTHP_16KB; 322 + size_t offset = 0x1234; 323 + 324 + if (get_mthp_bufs(iov, NR_BUFS, buf_size, MTHP_16KB, offset)) 325 + return T_EXIT_SKIP; 326 + 327 + ret = register_submit(ring, iov, NR_BUFS, fd_in, fd_out); 328 + free_bufs(iov, NR_BUFS, offset); 329 + return ret ? T_EXIT_FAIL : T_EXIT_PASS; 330 + } 331 + 332 + /* Should not coalesce */ 333 + static int test_page_mixture(struct io_uring *ring, int fd_in, int fd_out, int huge_on_left) 334 + { 335 + struct iovec iov[NR_BUFS]; 336 + size_t buf_size = HUGEPAGE_SIZE + MTHP_16KB; 337 + int ret; 338 + 339 + if (mmap_mixture(iov, NR_BUFS, buf_size, huge_on_left)) 340 + return T_EXIT_SKIP; 341 + 342 + ret = register_submit(ring, iov, NR_BUFS, fd_in, fd_out); 343 + unmap(iov, NR_BUFS, 0); 344 + return ret ? T_EXIT_FAIL : T_EXIT_PASS; 345 + } 346 + 347 + int main(int argc, char *argv[]) 348 + { 349 + struct io_uring ring; 350 + int ret, fd_in, fd_out; 351 + char *fname_in; 352 + 353 + if (argc > 1) 354 + fname_in = argv[1]; 355 + else 356 + fname_in = IN_FD; 357 + 358 + if (open_files(fname_in, &fd_in, &fd_out)) 359 + return T_EXIT_SKIP; 360 + 361 + ret = t_create_ring(8, &ring, 0); 362 + if (ret == T_SETUP_SKIP) 363 + return T_EXIT_SKIP; 364 + else if (ret < 0) 365 + return T_EXIT_FAIL; 366 + 367 + ret = test_one_hugepage(&ring, fd_in, fd_out); 368 + if (ret != T_EXIT_PASS) { 369 + if (ret != T_EXIT_SKIP) 370 + fprintf(stderr, "Test one hugepage failed.\n"); 371 + return ret; 372 + } 373 + 374 + ret = test_multi_hugepages(&ring, fd_in, fd_out); 375 + if (ret != T_EXIT_PASS) { 376 + if (ret != T_EXIT_SKIP) 377 + fprintf(stderr, "Test multi hugepages failed.\n"); 378 + return ret; 379 + } 380 + 381 + ret = test_unaligned_hugepage(&ring, fd_in, fd_out); 382 + if (ret != T_EXIT_PASS) { 383 + if (ret != T_EXIT_SKIP) 384 + fprintf(stderr, "Test unaligned hugepage failed.\n"); 385 + return ret; 386 + } 387 + 388 + ret = test_multi_unaligned_mthps(&ring, fd_in, fd_out); 389 + if (ret != T_EXIT_PASS) { 390 + if (ret != T_EXIT_SKIP) 391 + fprintf(stderr, "Test unaligned multi-size'd THPs failed.\n"); 392 + return ret; 393 + } 394 + 395 + ret = test_page_mixture(&ring, fd_in, fd_out, true); 396 + if (ret != T_EXIT_PASS) { 397 + if (ret != T_EXIT_SKIP) 398 + fprintf(stderr, "Test huge small page mixture (start with huge) failed.\n"); 399 + return ret; 400 + } 401 + 402 + ret = test_page_mixture(&ring, fd_in, fd_out, false); 403 + if (ret != T_EXIT_PASS) { 404 + if (ret != T_EXIT_SKIP) 405 + fprintf(stderr, "Test huge small page mixture (start with small) failed.\n"); 406 + return ret; 407 + } 408 + 409 + io_uring_queue_exit(&ring); 410 + return T_EXIT_PASS; 411 + }

+1 -1

vendor/liburing/test/fpos.c

··· 98 98 if (res == 0) { 99 99 done = true; 100 100 } else if (res == -ECANCELED) { 101 - /* cancelled, probably ok */ 101 + /* canceled, probably ok */ 102 102 } else if (res < 0 || res > blocksize) { 103 103 fprintf(stderr, "bad read: %d\n", res); 104 104 return -1;

+571

vendor/liburing/test/futex.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Description: exercise futex wait/wake/waitv 4 + * 5 + */ 6 + #include <stdio.h> 7 + #include <unistd.h> 8 + #include <stdlib.h> 9 + #include <pthread.h> 10 + #include <errno.h> 11 + #include <linux/futex.h> 12 + 13 + #include "liburing.h" 14 + #include "helpers.h" 15 + 16 + #define LOOPS 500 17 + #define NFUTEX 8 18 + 19 + #ifndef FUTEX2_SIZE_U8 20 + #define FUTEX2_SIZE_U8 0x00 21 + #define FUTEX2_SIZE_U16 0x01 22 + #define FUTEX2_SIZE_U32 0x02 23 + #define FUTEX2_SIZE_U64 0x03 24 + #define FUTEX2_NUMA 0x04 25 + /* 0x08 */ 26 + /* 0x10 */ 27 + /* 0x20 */ 28 + /* 0x40 */ 29 + #define FUTEX2_PRIVATE FUTEX_PRIVATE_FLAG 30 + 31 + #define FUTEX2_SIZE_MASK 0x03 32 + #endif 33 + 34 + static int no_futex; 35 + 36 + static void *fwake(void *data) 37 + { 38 + unsigned int *futex = data; 39 + struct io_uring_sqe *sqe; 40 + struct io_uring_cqe *cqe; 41 + struct io_uring ring; 42 + int ret; 43 + 44 + ret = io_uring_queue_init(1, &ring, 0); 45 + if (ret) { 46 + fprintf(stderr, "queue init: %d\n", ret); 47 + return NULL; 48 + } 49 + 50 + *futex = 1; 51 + sqe = io_uring_get_sqe(&ring); 52 + io_uring_prep_futex_wake(sqe, futex, 1, FUTEX_BITSET_MATCH_ANY, 53 + FUTEX2_SIZE_U32, 0); 54 + sqe->user_data = 3; 55 + 56 + io_uring_submit(&ring); 57 + 58 + ret = io_uring_wait_cqe(&ring, &cqe); 59 + if (ret) { 60 + fprintf(stderr, "wait: %d\n", ret); 61 + return NULL; 62 + } 63 + io_uring_cqe_seen(&ring, cqe); 64 + io_uring_queue_exit(&ring); 65 + return NULL; 66 + } 67 + 68 + static int __test(struct io_uring *ring, int vectored, int async, 69 + int async_cancel) 70 + { 71 + struct io_uring_sqe *sqe; 72 + struct io_uring_cqe *cqe; 73 + struct futex_waitv fw[NFUTEX]; 74 + unsigned int *futex; 75 + pthread_t threads[NFUTEX]; 76 + void *tret; 77 + int ret, i, nfutex; 78 + 79 + nfutex = NFUTEX; 80 + if (!vectored) 81 + nfutex = 1; 82 + 83 + futex = calloc(nfutex, sizeof(*futex)); 84 + for (i = 0; i < nfutex; i++) { 85 + fw[i].val = 0; 86 + fw[i].uaddr = (unsigned long) &futex[i]; 87 + fw[i].flags = FUTEX2_SIZE_U32; 88 + fw[i].__reserved = 0; 89 + } 90 + 91 + sqe = io_uring_get_sqe(ring); 92 + if (vectored) 93 + io_uring_prep_futex_waitv(sqe, fw, nfutex, 0); 94 + else 95 + io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, 96 + FUTEX2_SIZE_U32, 0); 97 + if (async) 98 + sqe->flags |= IOSQE_ASYNC; 99 + sqe->user_data = 1; 100 + 101 + io_uring_submit(ring); 102 + 103 + for (i = 0; i < nfutex; i++) 104 + pthread_create(&threads[i], NULL, fwake, &futex[i]); 105 + 106 + sqe = io_uring_get_sqe(ring); 107 + io_uring_prep_cancel64(sqe, 1, 0); 108 + if (async_cancel) 109 + sqe->flags |= IOSQE_ASYNC; 110 + sqe->user_data = 2; 111 + 112 + io_uring_submit(ring); 113 + 114 + for (i = 0; i < 2; i++) { 115 + ret = io_uring_wait_cqe(ring, &cqe); 116 + if (ret) { 117 + fprintf(stderr, "parent wait %d\n", ret); 118 + return 1; 119 + } 120 + 121 + if (cqe->res == -EINVAL || cqe->res == -EOPNOTSUPP) { 122 + no_futex = 1; 123 + return 0; 124 + } 125 + io_uring_cqe_seen(ring, cqe); 126 + } 127 + 128 + ret = io_uring_peek_cqe(ring, &cqe); 129 + if (!ret) { 130 + fprintf(stderr, "peek found cqe!\n"); 131 + return 1; 132 + } 133 + 134 + for (i = 0; i < nfutex; i++) 135 + pthread_join(threads[i], &tret); 136 + 137 + return 0; 138 + } 139 + 140 + static int test(int flags, int vectored) 141 + { 142 + struct io_uring ring; 143 + int ret, i; 144 + 145 + ret = io_uring_queue_init(8, &ring, flags); 146 + if (ret) 147 + return ret; 148 + 149 + for (i = 0; i < LOOPS; i++) { 150 + int async_cancel = (!i % 2); 151 + int async_wait = !(i % 3); 152 + ret = __test(&ring, vectored, async_wait, async_cancel); 153 + if (ret) { 154 + fprintf(stderr, "flags=%x, failed=%d\n", flags, i); 155 + break; 156 + } 157 + if (no_futex) 158 + break; 159 + } 160 + 161 + io_uring_queue_exit(&ring); 162 + return ret; 163 + } 164 + 165 + static int test_order(int vectored, int async) 166 + { 167 + struct io_uring_sqe *sqe; 168 + struct io_uring_cqe *cqe; 169 + struct futex_waitv fw; 170 + struct io_uring ring; 171 + unsigned int *futex; 172 + int ret, i; 173 + 174 + ret = io_uring_queue_init(8, &ring, 0); 175 + if (ret) 176 + return ret; 177 + 178 + futex = malloc(sizeof(*futex)); 179 + *futex = 0; 180 + 181 + fw.val = 0; 182 + fw.uaddr = (unsigned long) futex; 183 + fw.flags = FUTEX2_SIZE_U32; 184 + fw.__reserved = 0; 185 + 186 + /* 187 + * Submit two futex waits 188 + */ 189 + sqe = io_uring_get_sqe(&ring); 190 + if (!vectored) 191 + io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, 192 + FUTEX2_SIZE_U32, 0); 193 + else 194 + io_uring_prep_futex_waitv(sqe, &fw, 1, 0); 195 + sqe->user_data = 1; 196 + 197 + sqe = io_uring_get_sqe(&ring); 198 + if (!vectored) 199 + io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, 200 + FUTEX2_SIZE_U32, 0); 201 + else 202 + io_uring_prep_futex_waitv(sqe, &fw, 1, 0); 203 + sqe->user_data = 2; 204 + 205 + io_uring_submit(&ring); 206 + 207 + /* 208 + * Now submit wake for just one futex 209 + */ 210 + *futex = 1; 211 + sqe = io_uring_get_sqe(&ring); 212 + io_uring_prep_futex_wake(sqe, futex, 1, FUTEX_BITSET_MATCH_ANY, 213 + FUTEX2_SIZE_U32, 0); 214 + sqe->user_data = 100; 215 + if (async) 216 + sqe->flags |= IOSQE_ASYNC; 217 + 218 + io_uring_submit(&ring); 219 + 220 + /* 221 + * We expect to find completions for the first futex wait, and 222 + * the futex wake. We should not see the last futex wait. 223 + */ 224 + for (i = 0; i < 2; i++) { 225 + ret = io_uring_wait_cqe(&ring, &cqe); 226 + if (ret) { 227 + fprintf(stderr, "wait %d\n", ret); 228 + return 1; 229 + } 230 + if (cqe->user_data == 1 || cqe->user_data == 100) { 231 + io_uring_cqe_seen(&ring, cqe); 232 + continue; 233 + } 234 + fprintf(stderr, "unexpected cqe %lu, res %d\n", (unsigned long) cqe->user_data, cqe->res); 235 + return 1; 236 + } 237 + 238 + ret = io_uring_peek_cqe(&ring, &cqe); 239 + if (ret != -EAGAIN) { 240 + fprintf(stderr, "Unexpected cqe available: %d\n", cqe->res); 241 + return 1; 242 + } 243 + 244 + io_uring_queue_exit(&ring); 245 + return 0; 246 + } 247 + 248 + static int test_multi_wake(int vectored) 249 + { 250 + struct io_uring_sqe *sqe; 251 + struct io_uring_cqe *cqe; 252 + struct futex_waitv fw; 253 + struct io_uring ring; 254 + unsigned int *futex; 255 + int ret, i; 256 + 257 + ret = io_uring_queue_init(8, &ring, 0); 258 + if (ret) 259 + return ret; 260 + 261 + futex = malloc(sizeof(*futex)); 262 + *futex = 0; 263 + 264 + fw.val = 0; 265 + fw.uaddr = (unsigned long) futex; 266 + fw.flags = FUTEX2_SIZE_U32; 267 + fw.__reserved = 0; 268 + 269 + /* 270 + * Submit two futex waits 271 + */ 272 + sqe = io_uring_get_sqe(&ring); 273 + if (!vectored) 274 + io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, 275 + FUTEX2_SIZE_U32, 0); 276 + else 277 + io_uring_prep_futex_waitv(sqe, &fw, 1, 0); 278 + sqe->user_data = 1; 279 + 280 + sqe = io_uring_get_sqe(&ring); 281 + if (!vectored) 282 + io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, 283 + FUTEX2_SIZE_U32, 0); 284 + else 285 + io_uring_prep_futex_waitv(sqe, &fw, 1, 0); 286 + sqe->user_data = 2; 287 + 288 + io_uring_submit(&ring); 289 + 290 + /* 291 + * Now submit wake for both futexes 292 + */ 293 + *futex = 1; 294 + sqe = io_uring_get_sqe(&ring); 295 + io_uring_prep_futex_wake(sqe, futex, 2, FUTEX_BITSET_MATCH_ANY, 296 + FUTEX2_SIZE_U32, 0); 297 + sqe->user_data = 100; 298 + 299 + io_uring_submit(&ring); 300 + 301 + /* 302 + * We expect to find completions for the both futex waits, and 303 + * the futex wake. 304 + */ 305 + for (i = 0; i < 3; i++) { 306 + ret = io_uring_wait_cqe(&ring, &cqe); 307 + if (ret) { 308 + fprintf(stderr, "wait %d\n", ret); 309 + return 1; 310 + } 311 + if (cqe->res < 0) { 312 + fprintf(stderr, "cqe error %d\n", cqe->res); 313 + return 1; 314 + } 315 + io_uring_cqe_seen(&ring, cqe); 316 + } 317 + 318 + ret = io_uring_peek_cqe(&ring, &cqe); 319 + if (!ret) { 320 + fprintf(stderr, "peek found cqe!\n"); 321 + return 1; 322 + } 323 + 324 + io_uring_queue_exit(&ring); 325 + return 0; 326 + } 327 + 328 + /* 329 + * Test that waking 0 futexes returns 0 330 + */ 331 + static int test_wake_zero(void) 332 + { 333 + struct io_uring_sqe *sqe; 334 + struct io_uring_cqe *cqe; 335 + struct io_uring ring; 336 + unsigned int *futex; 337 + int ret; 338 + 339 + ret = io_uring_queue_init(8, &ring, 0); 340 + if (ret) 341 + return ret; 342 + 343 + futex = malloc(sizeof(*futex)); 344 + *futex = 0; 345 + 346 + sqe = io_uring_get_sqe(&ring); 347 + sqe->user_data = 1; 348 + io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, 349 + FUTEX2_SIZE_U32, 0); 350 + 351 + io_uring_submit(&ring); 352 + 353 + sqe = io_uring_get_sqe(&ring); 354 + sqe->user_data = 2; 355 + io_uring_prep_futex_wake(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, 356 + FUTEX2_SIZE_U32, 0); 357 + 358 + io_uring_submit(&ring); 359 + 360 + ret = io_uring_wait_cqe(&ring, &cqe); 361 + 362 + /* 363 + * Should get zero res and it should be the wake 364 + */ 365 + if (cqe->res || cqe->user_data != 2) { 366 + fprintf(stderr, "cqe res %d, data %ld\n", cqe->res, (long) cqe->user_data); 367 + return 1; 368 + } 369 + io_uring_cqe_seen(&ring, cqe); 370 + 371 + /* 372 + * Should not have the wait complete 373 + */ 374 + ret = io_uring_peek_cqe(&ring, &cqe); 375 + if (!ret) { 376 + fprintf(stderr, "peek found cqe!\n"); 377 + return 1; 378 + } 379 + 380 + io_uring_queue_exit(&ring); 381 + return 0; 382 + } 383 + 384 + /* 385 + * Test invalid wait/wake/waitv flags 386 + */ 387 + static int test_invalid(void) 388 + { 389 + struct io_uring_sqe *sqe; 390 + struct io_uring_cqe *cqe; 391 + struct futex_waitv fw; 392 + struct io_uring ring; 393 + unsigned int *futex; 394 + int ret; 395 + 396 + ret = io_uring_queue_init(8, &ring, 0); 397 + if (ret) 398 + return ret; 399 + 400 + futex = malloc(sizeof(*futex)); 401 + *futex = 0; 402 + 403 + sqe = io_uring_get_sqe(&ring); 404 + sqe->user_data = 1; 405 + io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, 0x1000, 406 + 0); 407 + 408 + io_uring_submit(&ring); 409 + 410 + ret = io_uring_wait_cqe(&ring, &cqe); 411 + 412 + /* 413 + * Should get zero res and it should be the wake 414 + */ 415 + if (cqe->res != -EINVAL) { 416 + fprintf(stderr, "wait cqe res %d\n", cqe->res); 417 + return 1; 418 + } 419 + io_uring_cqe_seen(&ring, cqe); 420 + 421 + sqe = io_uring_get_sqe(&ring); 422 + sqe->user_data = 1; 423 + io_uring_prep_futex_wake(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, 0x1000, 424 + 0); 425 + 426 + io_uring_submit(&ring); 427 + 428 + ret = io_uring_wait_cqe(&ring, &cqe); 429 + 430 + /* 431 + * Should get zero res and it should be the wake 432 + */ 433 + if (cqe->res != -EINVAL) { 434 + fprintf(stderr, "wake cqe res %d\n", cqe->res); 435 + return 1; 436 + } 437 + io_uring_cqe_seen(&ring, cqe); 438 + 439 + fw.val = 0; 440 + fw.uaddr = (unsigned long) futex; 441 + fw.flags = FUTEX2_SIZE_U32 | 0x1000; 442 + fw.__reserved = 0; 443 + 444 + sqe = io_uring_get_sqe(&ring); 445 + sqe->user_data = 1; 446 + io_uring_prep_futex_waitv(sqe, &fw, 1, 0); 447 + 448 + io_uring_submit(&ring); 449 + 450 + ret = io_uring_wait_cqe(&ring, &cqe); 451 + 452 + /* 453 + * Should get zero res and it should be the wake 454 + */ 455 + if (cqe->res != -EINVAL) { 456 + fprintf(stderr, "waitv cqe res %d\n", cqe->res); 457 + return 1; 458 + } 459 + io_uring_cqe_seen(&ring, cqe); 460 + 461 + io_uring_queue_exit(&ring); 462 + return 0; 463 + } 464 + 465 + int main(int argc, char *argv[]) 466 + { 467 + int ret; 468 + 469 + if (argc > 1) 470 + return T_EXIT_SKIP; 471 + 472 + ret = test(0, 0); 473 + if (ret) { 474 + fprintf(stderr, "test 0 0 failed\n"); 475 + return T_EXIT_FAIL; 476 + } 477 + if (no_futex) 478 + return T_EXIT_SKIP; 479 + 480 + ret = test(0, 1); 481 + if (ret) { 482 + fprintf(stderr, "test 0 1 failed\n"); 483 + return T_EXIT_FAIL; 484 + } 485 + 486 + ret = test_wake_zero(); 487 + if (ret) { 488 + fprintf(stderr, "wake 0 failed\n"); 489 + return T_EXIT_FAIL; 490 + } 491 + 492 + ret = test_invalid(); 493 + if (ret) { 494 + fprintf(stderr, "test invalid failed\n"); 495 + return T_EXIT_FAIL; 496 + } 497 + 498 + ret = test(IORING_SETUP_SQPOLL, 0); 499 + if (ret) { 500 + fprintf(stderr, "test sqpoll 0 failed\n"); 501 + return T_EXIT_FAIL; 502 + } 503 + 504 + ret = test(IORING_SETUP_SQPOLL, 1); 505 + if (ret) { 506 + fprintf(stderr, "test sqpoll 1 failed\n"); 507 + return T_EXIT_FAIL; 508 + } 509 + 510 + ret = test(IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN, 0); 511 + if (ret) { 512 + fprintf(stderr, "test single coop 0 failed\n"); 513 + return T_EXIT_FAIL; 514 + } 515 + 516 + ret = test(IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN, 1); 517 + if (ret) { 518 + fprintf(stderr, "test single coop 1 failed\n"); 519 + return T_EXIT_FAIL; 520 + } 521 + 522 + ret = test(IORING_SETUP_COOP_TASKRUN, 0); 523 + if (ret) { 524 + fprintf(stderr, "test taskrun 0 failed\n"); 525 + return T_EXIT_FAIL; 526 + } 527 + 528 + ret = test(IORING_SETUP_COOP_TASKRUN, 1); 529 + if (ret) { 530 + fprintf(stderr, "test taskrun 1 failed\n"); 531 + return T_EXIT_FAIL; 532 + } 533 + 534 + ret = test_order(0, 0); 535 + if (ret) { 536 + fprintf(stderr, "test_order 0 0 failed\n"); 537 + return T_EXIT_FAIL; 538 + } 539 + 540 + ret = test_order(1, 0); 541 + if (ret) { 542 + fprintf(stderr, "test_order 1 0 failed\n"); 543 + return T_EXIT_FAIL; 544 + } 545 + 546 + ret = test_order(0, 1); 547 + if (ret) { 548 + fprintf(stderr, "test_order 0 1 failed\n"); 549 + return T_EXIT_FAIL; 550 + } 551 + 552 + ret = test_order(1, 1); 553 + if (ret) { 554 + fprintf(stderr, "test_order 1 1 failed\n"); 555 + return T_EXIT_FAIL; 556 + } 557 + 558 + ret = test_multi_wake(0); 559 + if (ret) { 560 + fprintf(stderr, "multi_wake 0 failed\n"); 561 + return T_EXIT_FAIL; 562 + } 563 + 564 + ret = test_multi_wake(1); 565 + if (ret) { 566 + fprintf(stderr, "multi_wake 1 failed\n"); 567 + return T_EXIT_FAIL; 568 + } 569 + 570 + return T_EXIT_PASS; 571 + }

+69 -39

vendor/liburing/test/hardlink.c

··· 12 12 #include "liburing.h" 13 13 #include "helpers.h" 14 14 15 - 16 - static int do_linkat(struct io_uring *ring, const char *oldname, const char *newname) 15 + static int do_linkat(struct io_uring *ring, int olddirfd, const char *oldname, 16 + const char *newname, int flags) 17 17 { 18 - int ret; 19 18 struct io_uring_sqe *sqe; 20 19 struct io_uring_cqe *cqe; 20 + int ret; 21 21 22 22 sqe = io_uring_get_sqe(ring); 23 23 if (!sqe) { 24 24 fprintf(stderr, "sqe get failed\n"); 25 - goto err; 25 + return 1; 26 26 } 27 - io_uring_prep_linkat(sqe, AT_FDCWD, oldname, AT_FDCWD, newname, 0); 27 + io_uring_prep_linkat(sqe, olddirfd, oldname, AT_FDCWD, newname, flags); 28 28 29 29 ret = io_uring_submit(ring); 30 30 if (ret != 1) { 31 31 fprintf(stderr, "submit failed: %d\n", ret); 32 - goto err; 32 + return 1; 33 33 } 34 34 35 35 ret = io_uring_wait_cqes(ring, &cqe, 1, 0, 0); 36 36 if (ret) { 37 37 fprintf(stderr, "wait_cqe failed: %d\n", ret); 38 - goto err; 38 + return 1; 39 39 } 40 40 ret = cqe->res; 41 41 io_uring_cqe_seen(ring, cqe); 42 42 return ret; 43 - err: 44 - return 1; 45 43 } 46 44 47 45 static int files_linked_ok(const char* fn1, const char *fn2) ··· 70 68 int main(int argc, char *argv[]) 71 69 { 72 70 static const char target[] = "io_uring-linkat-test-target"; 71 + static const char emptyname[] = "io_uring-linkat-test-empty"; 73 72 static const char linkname[] = "io_uring-linkat-test-link"; 74 - int ret; 73 + static const char symlinkname[] = "io_uring-linkat-test-symlink"; 75 74 struct io_uring ring; 75 + int ret, fd, exit_status = T_EXIT_FAIL; 76 76 77 77 if (argc > 1) 78 78 return T_EXIT_SKIP; ··· 83 83 return ret; 84 84 } 85 85 86 - ret = open(target, O_CREAT | O_RDWR | O_EXCL, 0600); 86 + ret = fd = open(target, O_CREAT | O_RDWR | O_EXCL, 0600); 87 87 if (ret < 0) { 88 88 perror("open"); 89 - goto err; 89 + goto out; 90 90 } 91 - if (write(ret, "linktest", 8) != 8) { 92 - close(ret); 93 - goto err1; 91 + if (write(fd, "linktest", 8) != 8) { 92 + close(fd); 93 + goto out; 94 94 } 95 - close(ret); 95 + if(geteuid()) { 96 + fprintf(stdout, "not root, skipping AT_EMPTY_PATH test\n"); 97 + } else { 98 + ret = do_linkat(&ring, fd, "", emptyname, AT_EMPTY_PATH); 99 + if (ret < 0) { 100 + if (ret == -EBADF || ret == -EINVAL) { 101 + fprintf(stdout, "linkat not supported, skipping\n"); 102 + exit_status = T_EXIT_SKIP; 103 + goto out; 104 + } 105 + fprintf(stderr, "linkat: %s\n", strerror(-ret)); 106 + goto out; 107 + } else if (ret) { 108 + goto out; 109 + } 110 + if (!files_linked_ok(emptyname, target)) 111 + goto out; 112 + unlinkat(AT_FDCWD, emptyname, 0); 113 + } 114 + close(fd); 96 115 97 - ret = do_linkat(&ring, target, linkname); 116 + ret = symlink(target, symlinkname); 117 + if (ret < 0) { 118 + perror("open"); 119 + goto out; 120 + } 121 + 122 + ret = do_linkat(&ring, AT_FDCWD, target, linkname, 0); 98 123 if (ret < 0) { 99 124 if (ret == -EBADF || ret == -EINVAL) { 100 125 fprintf(stdout, "linkat not supported, skipping\n"); 101 - goto skip; 126 + exit_status = T_EXIT_SKIP; 127 + goto out; 102 128 } 103 129 fprintf(stderr, "linkat: %s\n", strerror(-ret)); 104 - goto err1; 130 + goto out; 105 131 } else if (ret) { 106 - goto err1; 132 + goto out; 107 133 } 108 134 109 135 if (!files_linked_ok(linkname, target)) 110 - goto err2; 136 + goto out; 137 + 138 + unlinkat(AT_FDCWD, linkname, 0); 139 + 140 + ret = do_linkat(&ring, AT_FDCWD, symlinkname, linkname, AT_SYMLINK_FOLLOW); 141 + if (ret < 0) { 142 + fprintf(stderr, "linkat: %s\n", strerror(-ret)); 143 + goto out; 144 + } else if (ret) { 145 + goto out; 146 + } 111 147 112 - ret = do_linkat(&ring, target, linkname); 148 + if (!files_linked_ok(symlinkname, target)) 149 + goto out; 150 + 151 + ret = do_linkat(&ring, AT_FDCWD, target, linkname, 0); 113 152 if (ret != -EEXIST) { 114 153 fprintf(stderr, "test_linkat linkname already exists failed: %d\n", ret); 115 - goto err2; 154 + goto out; 116 155 } 117 156 118 - ret = do_linkat(&ring, target, "surely/this/does/not/exist"); 157 + ret = do_linkat(&ring, AT_FDCWD, target, "surely/this/does/not/exist", 0); 119 158 if (ret != -ENOENT) { 120 159 fprintf(stderr, "test_linkat no parent failed: %d\n", ret); 121 - goto err2; 160 + goto out; 122 161 } 123 - 162 + exit_status = T_EXIT_PASS; 163 + out: 164 + unlinkat(AT_FDCWD, symlinkname, 0); 124 165 unlinkat(AT_FDCWD, linkname, 0); 166 + unlinkat(AT_FDCWD, emptyname, 0); 125 167 unlinkat(AT_FDCWD, target, 0); 126 168 io_uring_queue_exit(&ring); 127 - return T_EXIT_PASS; 128 - skip: 129 - unlinkat(AT_FDCWD, linkname, 0); 130 - unlinkat(AT_FDCWD, target, 0); 131 - io_uring_queue_exit(&ring); 132 - return T_EXIT_SKIP; 133 - err2: 134 - unlinkat(AT_FDCWD, linkname, 0); 135 - err1: 136 - unlinkat(AT_FDCWD, target, 0); 137 - err: 138 - io_uring_queue_exit(&ring); 139 - return T_EXIT_FAIL; 169 + return exit_status; 140 170 }

+11 -13

vendor/liburing/test/helpers.c

··· 36 36 int t_bind_ephemeral_port(int fd, struct sockaddr_in *addr) 37 37 { 38 38 socklen_t addrlen; 39 + int ret; 39 40 40 41 addr->sin_port = 0; 41 42 if (bind(fd, (struct sockaddr *)addr, sizeof(*addr))) 42 43 return -errno; 43 44 44 45 addrlen = sizeof(*addr); 45 - assert(!getsockname(fd, (struct sockaddr *)addr, &addrlen)); 46 + ret = getsockname(fd, (struct sockaddr *)addr, &addrlen); 47 + assert(!ret); 46 48 assert(addr->sin_port != 0); 47 49 return 0; 48 50 } ··· 284 286 * Ensure kernel sees the SQE updates before the tail update. 285 287 */ 286 288 if (!(ring->flags & IORING_SETUP_SQPOLL)) 287 - IO_URING_WRITE_ONCE(*sq->ktail, tail); 289 + *sq->ktail = tail; 288 290 else 289 291 io_uring_smp_store_release(sq->ktail, tail); 290 292 } 291 293 /* 292 - * This _may_ look problematic, as we're not supposed to be reading 293 - * SQ->head without acquire semantics. When we're in SQPOLL mode, the 294 - * kernel submitter could be updating this right now. For non-SQPOLL, 295 - * task itself does it, and there's no potential race. But even for 296 - * SQPOLL, the load is going to be potentially out-of-date the very 297 - * instant it's done, regardless or whether or not it's done 298 - * atomically. Worst case, we're going to be over-estimating what 299 - * we can submit. The point is, we need to be able to deal with this 300 - * situation regardless of any perceived atomicity. 301 - */ 302 - return tail - *sq->khead; 294 + * This load needs to be atomic, since sq->khead is written concurrently 295 + * by the kernel, but it doesn't need to be load_acquire, since the 296 + * kernel doesn't store to the submission queue; it advances khead just 297 + * to indicate that it's finished reading the submission queue entries 298 + * so they're available for us to write to. 299 + */ 300 + return tail - IO_URING_READ_ONCE(*sq->khead); 303 301 } 304 302 305 303 /*

+10

vendor/liburing/test/helpers.h

··· 10 10 #endif 11 11 12 12 #include "liburing.h" 13 + #include "../src/setup.h" 13 14 #include <arpa/inet.h> 14 15 15 16 enum t_setup_ret { ··· 86 87 bool t_probe_defer_taskrun(void); 87 88 88 89 unsigned __io_uring_flush_sq(struct io_uring *ring); 90 + 91 + static inline int t_io_uring_init_sqarray(unsigned entries, struct io_uring *ring, 92 + struct io_uring_params *p) 93 + { 94 + int ret; 95 + 96 + ret = __io_uring_queue_init_params(entries, ring, p, NULL, 0); 97 + return ret >= 0 ? 0 : ret; 98 + } 89 99 90 100 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 91 101

+48

vendor/liburing/test/ignore-single-mmap.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * 6.10-rc merge window had a bug where the rewritten mmap support caused 4 + * rings allocated with > 1 page, but asking for smaller mappings, would 5 + * cause -EFAULT to be returned rather than a succesful map. This hit 6 + * applications either using an ancient liburing with IORING_FEAT_SINGLE_MMAP 7 + * support, or application just ignoring that feature flag and still doing 8 + * 3 mmap operations to map the ring. 9 + */ 10 + #include <stdio.h> 11 + #include <stdlib.h> 12 + #include <unistd.h> 13 + 14 + #include "../src/syscall.h" 15 + #include "liburing.h" 16 + #include "helpers.h" 17 + 18 + #define ENTRIES 128 19 + 20 + int main(int argc, char *argv[]) 21 + { 22 + struct io_uring_params p = { }; 23 + void *ptr; 24 + int fd; 25 + 26 + if (argc > 1) 27 + return T_EXIT_SKIP; 28 + 29 + fd = __sys_io_uring_setup(ENTRIES, &p); 30 + if (fd < 0) 31 + return T_EXIT_SKIP; 32 + 33 + if (!(p.features & IORING_FEAT_SINGLE_MMAP)) { 34 + close(fd); 35 + return T_EXIT_SKIP; 36 + } 37 + 38 + ptr = __sys_mmap(0, ENTRIES * sizeof(unsigned), PROT_READ | PROT_WRITE, 39 + MAP_SHARED | MAP_POPULATE, fd, 40 + IORING_OFF_SQ_RING); 41 + if (!IS_ERR(ptr)) { 42 + close(fd); 43 + return T_EXIT_PASS; 44 + } 45 + 46 + fprintf(stderr, "ring sqe array mmap: %d\n", PTR_ERR(ptr)); 47 + return T_EXIT_FAIL; 48 + }

+164

vendor/liburing/test/init-mem.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Description: Check that io_uring_queue_init_mem() doesn't underestimate 4 + * the memory required for various size rings. 5 + */ 6 + #include <stdio.h> 7 + #include <unistd.h> 8 + #include <errno.h> 9 + #include <sys/mman.h> 10 + #include <linux/mman.h> 11 + #include <stdlib.h> 12 + #include <string.h> 13 + #include <netinet/udp.h> 14 + #include <arpa/inet.h> 15 + #include <net/if.h> 16 + #include <error.h> 17 + 18 + #include "liburing.h" 19 + #include "helpers.h" 20 + 21 + #define PRE_RED 0x5aa55aa55aa55aa5ULL 22 + #define POST_RED 0xa55aa55aa55aa55aULL 23 + 24 + struct ctx { 25 + struct io_uring ring; 26 + void *ring_mem; 27 + void *mem; 28 + unsigned long long *pre; 29 + unsigned long long *post; 30 + }; 31 + 32 + struct q_entries { 33 + unsigned int sqes; 34 + unsigned int cqes; 35 + }; 36 + 37 + static int setup_ctx(struct ctx *ctx, struct q_entries *q) 38 + { 39 + struct io_uring_params p = { }; 40 + int ret; 41 + 42 + if (posix_memalign(&ctx->mem, 4096, 2*1024*1024)) 43 + return T_EXIT_FAIL; 44 + 45 + ctx->pre = ctx->mem + 4096 - sizeof(unsigned long); 46 + *ctx->pre = PRE_RED; 47 + 48 + ctx->ring_mem = ctx->mem + 4096; 49 + p.flags |= IORING_SETUP_CQSIZE | IORING_SETUP_NO_SQARRAY; 50 + p.sq_entries = q->sqes; 51 + p.cq_entries = q->cqes; 52 + 53 + ret = io_uring_queue_init_mem(q->sqes, &ctx->ring, &p, 54 + ctx->ring_mem, 2*1024*1024); 55 + 56 + if (ret < 0) { 57 + if (ret == -EINVAL) 58 + return T_EXIT_SKIP; 59 + fprintf(stderr, "queue init: %d\n", ret); 60 + return T_EXIT_FAIL; 61 + } 62 + 63 + ctx->post = ctx->ring_mem + ret; 64 + *ctx->post = POST_RED; 65 + return 0; 66 + } 67 + 68 + static void clean_ctx(struct ctx *ctx) 69 + { 70 + io_uring_queue_exit(&ctx->ring); 71 + } 72 + 73 + static int check_red(struct ctx *ctx, unsigned long i) 74 + { 75 + int fail = 0; 76 + 77 + if (*ctx->pre != PRE_RED) { 78 + printf("pre redzone=%llx at i=%lu\n", *ctx->pre, i); 79 + fail = 1; 80 + } 81 + if (*ctx->post != POST_RED) { 82 + printf("post redzone=%llx at i=%lu\n", *ctx->post, i); 83 + fail = 1; 84 + } 85 + return fail; 86 + } 87 + 88 + static int test(struct q_entries *q) 89 + { 90 + struct io_uring_sqe *sqe; 91 + struct io_uring_cqe *cqe; 92 + struct ctx ctx = { }; 93 + unsigned long i, ud; 94 + int j, ret, batch; 95 + 96 + ret = setup_ctx(&ctx, q); 97 + if (ret == T_EXIT_SKIP) 98 + return T_EXIT_SKIP; 99 + else if (ret != T_EXIT_PASS) 100 + return ret; 101 + 102 + batch = 64; 103 + if (batch > q->sqes) 104 + batch = q->sqes; 105 + 106 + i = ud = 0; 107 + while (i < q->cqes * 2) { 108 + if (check_red(&ctx, i)) 109 + return T_EXIT_FAIL; 110 + for (j = 0; j < batch; j++) { 111 + sqe = io_uring_get_sqe(&ctx.ring); 112 + io_uring_prep_nop(sqe); 113 + sqe->user_data = j + (unsigned long) i; 114 + } 115 + io_uring_submit(&ctx.ring); 116 + for (j = 0; j < batch; j++) { 117 + ret = io_uring_wait_cqe(&ctx.ring, &cqe); 118 + if (ret) 119 + goto err; 120 + if (cqe->user_data != ud) { 121 + fprintf(stderr, "ud=%lu, wanted %lu\n", (unsigned long) cqe->user_data, ud); 122 + goto err; 123 + } 124 + ud++; 125 + io_uring_cqe_seen(&ctx.ring, cqe); 126 + } 127 + i += batch; 128 + } 129 + 130 + clean_ctx(&ctx); 131 + return T_EXIT_PASS; 132 + err: 133 + clean_ctx(&ctx); 134 + return T_EXIT_FAIL; 135 + } 136 + 137 + int main(int argc, char *argv[]) 138 + { 139 + struct q_entries q_entries[] = { 140 + { 256, 16384 }, 141 + { 32, 4096 }, 142 + { 128, 8192 }, 143 + { 4096, 32768 }, 144 + { 1, 8 }, 145 + { 2, 1024 }, 146 + }; 147 + int i, ret; 148 + 149 + if (argc > 1) 150 + return T_EXIT_SKIP; 151 + 152 + for (i = 0; i < ARRAY_SIZE(q_entries); i++) { 153 + ret = test(&q_entries[i]); 154 + if (ret == T_EXIT_SKIP) { 155 + return T_EXIT_SKIP; 156 + } else if (ret != T_EXIT_PASS) { 157 + fprintf(stderr, "Failed at %d/%d\n", q_entries[i].sqes, 158 + q_entries[i].cqes); 159 + return T_EXIT_FAIL; 160 + } 161 + } 162 + 163 + return T_EXIT_PASS; 164 + }

+12 -8

vendor/liburing/test/io-cancel.c

··· 93 93 if (do_partial && cqe->user_data) { 94 94 if (!(cqe->user_data & 1)) { 95 95 if (cqe->res != BS) { 96 - fprintf(stderr, "IO %d wasn't cancelled but got error %d\n", (unsigned) cqe->user_data, cqe->res); 96 + fprintf(stderr, "IO %d wasn't canceled but got error %d\n", (unsigned) cqe->user_data, cqe->res); 97 97 goto err; 98 98 } 99 99 } ··· 147 147 148 148 /* 149 149 * Test cancels. If 'do_partial' is set, then we only attempt to cancel half of 150 - * the submitted IO. This is done to verify that cancelling one piece of IO doesn't 150 + * the submitted IO. This is done to verify that canceling one piece of IO doesn't 151 151 * impact others. 152 152 */ 153 153 static int test_io_cancel(const char *file, int do_write, int do_partial, ··· 271 271 272 272 ret = io_uring_wait_cqe_timeout(&ring1, &cqe, &ts); 273 273 if (ret != -ETIME) { 274 - fprintf(stderr, "read got cancelled or wait failed\n"); 274 + fprintf(stderr, "read got canceled or wait failed\n"); 275 275 return 1; 276 276 } 277 277 io_uring_cqe_seen(&ring1, cqe); ··· 347 347 case 1: 348 348 if (cqe->res != -EINTR && 349 349 cqe->res != -ECANCELED) { 350 - fprintf(stderr, "%i %i\n", (int)cqe->user_data, cqe->res); 350 + fprintf(stderr, "user_data %i res %i\n", 351 + (unsigned)cqe->user_data, cqe->res); 351 352 exit(1); 352 353 } 353 354 break; 354 355 case 2: 355 356 if (cqe->res != -EALREADY && cqe->res) { 356 - fprintf(stderr, "%i %i\n", (int)cqe->user_data, cqe->res); 357 + fprintf(stderr, "user_data %i res %i\n", 358 + (unsigned)cqe->user_data, cqe->res); 357 359 exit(1); 358 360 } 359 361 break; 360 362 default: 361 - fprintf(stderr, "%i %i\n", (int)cqe->user_data, cqe->res); 363 + fprintf(stderr, "user_data %i res %i\n", 364 + (unsigned)cqe->user_data, cqe->res); 362 365 exit(1); 363 366 } 364 367 ··· 451 454 if ((cqe->user_data == 1 && cqe->res != -ECANCELED) || 452 455 (cqe->user_data == 2 && cqe->res != -ECANCELED) || 453 456 (cqe->user_data == 3 && cqe->res != -ETIME)) { 454 - fprintf(stderr, "%i %i\n", (int)cqe->user_data, cqe->res); 457 + fprintf(stderr, "user_data %i res %i\n", 458 + (unsigned)cqe->user_data, cqe->res); 455 459 return 1; 456 460 } 457 461 io_uring_cqe_seen(&ring, cqe); ··· 497 501 sleep(1); 498 502 io_uring_queue_exit(&ring); 499 503 500 - /* close the write end, so if ring is cancelled properly read() fails*/ 504 + /* close the write end, so if ring is canceled properly read() fails*/ 501 505 close(fds[1]); 502 506 ret = read(fds[0], buffer, 10); 503 507 close(fds[0]);

+7 -4

vendor/liburing/test/io_uring_enter.c

··· 169 169 ret = io_uring_submit(ring); 170 170 unlink(template); 171 171 if (ret < 0) { 172 - perror("io_uring_enter"); 172 + fprintf(stderr, "io_uring_queue_enter: %s\n", strerror(-ret)); 173 173 exit(1); 174 174 } 175 175 } ··· 183 183 unsigned ktail, mask, index; 184 184 unsigned sq_entries; 185 185 unsigned completed, dropped; 186 + struct io_uring_params p; 186 187 187 188 if (argc > 1) 188 189 return T_EXIT_SKIP; 189 190 190 - ret = io_uring_queue_init(IORING_MAX_ENTRIES, &ring, 0); 191 + memset(&p, 0, sizeof(p)); 192 + ret = t_io_uring_init_sqarray(IORING_MAX_ENTRIES, &ring, &p); 191 193 if (ret == -ENOMEM) 192 - ret = io_uring_queue_init(IORING_MAX_ENTRIES_FALLBACK, &ring, 0); 194 + ret = t_io_uring_init_sqarray(IORING_MAX_ENTRIES_FALLBACK, 195 + &ring, &p); 193 196 if (ret < 0) { 194 - perror("io_uring_queue_init"); 197 + fprintf(stderr, "queue_init: %s\n", strerror(-ret)); 195 198 exit(T_EXIT_FAIL); 196 199 } 197 200 mask = sq->ring_mask;

+24 -27

vendor/liburing/test/io_uring_register.c

··· 32 32 static int devnull; 33 33 34 34 static int expect_fail(int fd, unsigned int opcode, void *arg, 35 - unsigned int nr_args, int error) 35 + unsigned int nr_args, int error, int error2) 36 36 { 37 37 int ret; 38 38 ··· 55 55 return 1; 56 56 } 57 57 58 - if (ret != error) { 59 - fprintf(stderr, "expected %d, got %d\n", error, ret); 58 + if (ret != error && (error2 && ret != error2)) { 59 + fprintf(stderr, "expected %d/%d, got %d\n", error, error2, ret); 60 60 return 1; 61 61 } 62 62 return 0; ··· 195 195 status = 0; 196 196 ret = io_uring_register(uring_fd, IORING_UNREGISTER_FILES, 0, 0); 197 197 if (ret < 0) { 198 - ret = errno; 199 - errno = ret; 198 + errno = -ret; 200 199 perror("io_uring_register UNREGISTER_FILES"); 201 200 exit(1); 202 201 } ··· 230 229 231 230 while (iov.iov_len) { 232 231 ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1); 233 - if (ret < 0) { 234 - if (errno == ENOMEM) { 235 - iov.iov_len /= 2; 236 - continue; 237 - } 238 - if (errno == EFAULT) { 239 - free(buf); 240 - return 0; 241 - } 242 - fprintf(stderr, "expected success or EFAULT, got %d\n", errno); 232 + if (ret == -ENOMEM) { 233 + iov.iov_len /= 2; 234 + continue; 235 + } else if (ret == -EFAULT) { 236 + free(buf); 237 + return 0; 238 + } else if (ret) { 239 + fprintf(stderr, "expected success or EFAULT, got %d\n", ret); 243 240 free(buf); 244 241 return 1; 245 242 } 246 243 ret = io_uring_register(fd, IORING_UNREGISTER_BUFFERS, NULL, 0); 247 244 if (ret != 0) { 248 - fprintf(stderr, "error: unregister failed with %d\n", errno); 245 + fprintf(stderr, "error: unregister failed with %d\n", ret); 249 246 free(buf); 250 247 return 1; 251 248 } ··· 277 274 iovs[i].iov_len = pagesize; 278 275 } 279 276 280 - status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, -EINVAL); 277 + status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, -EINVAL, 0); 281 278 282 279 /* reduce to UIO_MAXIOV */ 283 280 nr = UIO_MAXIOV; 284 281 ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, iovs, nr); 285 - if (ret && (errno == ENOMEM || errno == EPERM) && geteuid()) { 282 + if ((ret == -ENOMEM || ret == -EPERM) && geteuid()) { 286 283 fprintf(stderr, "can't register large iovec for regular users, skip\n"); 287 284 } else if (ret != 0) { 288 - fprintf(stderr, "expected success, got %d\n", errno); 285 + fprintf(stderr, "expected success, got %d\n", ret); 289 286 status = 1; 290 287 } else { 291 288 io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0); ··· 308 305 /* NULL pointer for base */ 309 306 iov.iov_base = 0; 310 307 iov.iov_len = 4096; 311 - status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT); 308 + status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0); 312 309 313 310 /* valid base, 0 length */ 314 311 iov.iov_base = &buf; 315 312 iov.iov_len = 0; 316 - status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT); 313 + status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0); 317 314 318 315 /* valid base, length exceeds size */ 319 316 /* this requires an unampped page directly after buf */ ··· 324 321 assert(ret == 0); 325 322 iov.iov_base = buf; 326 323 iov.iov_len = 2 * pagesize; 327 - status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT); 324 + status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0); 328 325 munmap(buf, pagesize); 329 326 330 327 /* huge page */ ··· 372 369 status = 1; 373 370 iov.iov_base = buf; 374 371 iov.iov_len = 2*1024*1024; 375 - status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EOPNOTSUPP); 372 + status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, -EOPNOTSUPP); 376 373 munmap(buf, 2*1024*1024); 377 374 378 375 /* bump up against the soft limit and make sure we get EFAULT ··· 442 439 * fail, because the kernel does not allow registering of the 443 440 * ring_fd. 444 441 */ 445 - status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, -EBADF); 442 + status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, -EBADF, 0); 446 443 447 444 /* tear down queue */ 448 445 io_uring_queue_exit(&ring); ··· 475 472 } 476 473 477 474 /* invalid fd */ 478 - status |= expect_fail(-1, 0, NULL, 0, -EBADF); 475 + status |= expect_fail(-1, 0, NULL, 0, -EBADF, 0); 479 476 /* valid fd that is not an io_uring fd */ 480 - status |= expect_fail(devnull, 0, NULL, 0, -EOPNOTSUPP); 477 + status |= expect_fail(devnull, 0, NULL, 0, -EOPNOTSUPP, 0); 481 478 482 479 /* invalid opcode */ 483 480 memset(&p, 0, sizeof(p)); 484 481 fd = new_io_uring(1, &p); 485 - ret = expect_fail(fd, ~0U, NULL, 0, -EINVAL); 482 + ret = expect_fail(fd, ~0U, NULL, 0, -EINVAL, 0); 486 483 if (ret) { 487 484 /* if this succeeds, tear down the io_uring instance 488 485 * and start clean for the next test. */

+1 -1

vendor/liburing/test/io_uring_setup.c

··· 17 17 #include "liburing.h" 18 18 #include "helpers.h" 19 19 20 - #include "../syscall.h" 20 + #include "../src/syscall.h" 21 21 22 22 /* bogus: setup returns a valid fd on success... expect can't predict the 23 23 fd we'll get, so this really only takes 1 parameter: error */

+331

vendor/liburing/test/msg-ring-fd.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Description: test fd passing with MSG_RING 4 + * 5 + */ 6 + #include <errno.h> 7 + #include <stdio.h> 8 + #include <unistd.h> 9 + #include <stdlib.h> 10 + #include <string.h> 11 + #include <fcntl.h> 12 + #include <pthread.h> 13 + 14 + #include "liburing.h" 15 + #include "helpers.h" 16 + 17 + static int no_msg; 18 + static int no_sparse; 19 + static int no_fd_pass; 20 + 21 + struct data { 22 + pthread_t thread; 23 + pthread_barrier_t barrier; 24 + int ring_flags; 25 + int ring_fd; 26 + char buf[32]; 27 + }; 28 + 29 + static void *thread_fn(void *__data) 30 + { 31 + struct io_uring_sqe *sqe; 32 + struct io_uring_cqe *cqe; 33 + struct data *d = __data; 34 + struct io_uring ring; 35 + int ret, fd = -1; 36 + 37 + io_uring_queue_init(8, &ring, d->ring_flags); 38 + ret = io_uring_register_files(&ring, &fd, 1); 39 + if (ret) { 40 + if (ret != -EINVAL && ret != -EBADF) 41 + fprintf(stderr, "thread file register: %d\n", ret); 42 + no_sparse = 1; 43 + pthread_barrier_wait(&d->barrier); 44 + return NULL; 45 + } 46 + 47 + d->ring_fd = ring.ring_fd; 48 + pthread_barrier_wait(&d->barrier); 49 + 50 + /* wait for MSG */ 51 + ret = io_uring_wait_cqe(&ring, &cqe); 52 + if (ret) { 53 + fprintf(stderr, "wait_cqe dst: %d\n", ret); 54 + return NULL; 55 + } 56 + if (cqe->res < 0) { 57 + fprintf(stderr, "cqe error dst: %d\n", cqe->res); 58 + return NULL; 59 + } 60 + 61 + fd = cqe->res; 62 + io_uring_cqe_seen(&ring, cqe); 63 + sqe = io_uring_get_sqe(&ring); 64 + io_uring_prep_read(sqe, fd, d->buf, sizeof(d->buf), 0); 65 + sqe->flags |= IOSQE_FIXED_FILE; 66 + io_uring_submit(&ring); 67 + 68 + ret = io_uring_wait_cqe(&ring, &cqe); 69 + if (ret) { 70 + fprintf(stderr, "wait_cqe dst: %d\n", ret); 71 + return NULL; 72 + } 73 + if (cqe->res < 0) { 74 + fprintf(stderr, "cqe error dst: %d\n", cqe->res); 75 + return NULL; 76 + } 77 + 78 + io_uring_queue_exit(&ring); 79 + return NULL; 80 + } 81 + 82 + static int test_remote(struct io_uring *src, int ring_flags) 83 + { 84 + struct io_uring_sqe *sqe; 85 + struct io_uring_cqe *cqe; 86 + int fds[2], fd, ret; 87 + struct data d; 88 + char buf[32]; 89 + void *tret; 90 + int i; 91 + 92 + if (no_fd_pass) 93 + return 0; 94 + 95 + pthread_barrier_init(&d.barrier, NULL, 2); 96 + d.ring_flags = ring_flags; 97 + pthread_create(&d.thread, NULL, thread_fn, &d); 98 + pthread_barrier_wait(&d.barrier); 99 + memset(d.buf, 0, sizeof(d.buf)); 100 + 101 + if (no_sparse) 102 + return 0; 103 + 104 + if (pipe(fds) < 0) { 105 + perror("pipe"); 106 + return 1; 107 + } 108 + 109 + fd = fds[0]; 110 + ret = io_uring_register_files(src, &fd, 1); 111 + if (ret) { 112 + fprintf(stderr, "register files failed: %d\n", ret); 113 + return 1; 114 + } 115 + 116 + for (i = 0; i < ARRAY_SIZE(buf); i++) 117 + buf[i] = rand(); 118 + 119 + sqe = io_uring_get_sqe(src); 120 + io_uring_prep_write(sqe, fds[1], buf, sizeof(buf), 0); 121 + sqe->user_data = 1; 122 + 123 + sqe = io_uring_get_sqe(src); 124 + io_uring_prep_msg_ring_fd(sqe, d.ring_fd, 0, 0, 0, 0); 125 + sqe->user_data = 2; 126 + 127 + io_uring_submit(src); 128 + 129 + for (i = 0; i < 2; i++) { 130 + ret = io_uring_wait_cqe(src, &cqe); 131 + if (ret) { 132 + fprintf(stderr, "wait_cqe: %d\n", ret); 133 + return 1; 134 + } 135 + if (cqe->res < 0) { 136 + fprintf(stderr, "cqe res %d\n", cqe->res); 137 + return 1; 138 + } 139 + if (cqe->user_data == 1 && cqe->res != sizeof(buf)) { 140 + fprintf(stderr, "short write %d\n", cqe->res); 141 + return 1; 142 + } 143 + io_uring_cqe_seen(src, cqe); 144 + } 145 + 146 + pthread_join(d.thread, &tret); 147 + 148 + if (memcmp(buf, d.buf, sizeof(buf))) { 149 + fprintf(stderr, "buffers differ\n"); 150 + return 1; 151 + } 152 + 153 + close(fds[0]); 154 + close(fds[1]); 155 + io_uring_unregister_files(src); 156 + return 0; 157 + } 158 + 159 + static int test_local(struct io_uring *src, struct io_uring *dst) 160 + { 161 + struct io_uring_sqe *sqe; 162 + struct io_uring_cqe *cqe; 163 + int fds[2], fd, ret; 164 + char buf[32], dst_buf[32]; 165 + int i; 166 + 167 + if (no_fd_pass) 168 + return 0; 169 + 170 + fd = -1; 171 + ret = io_uring_register_files(dst, &fd, 1); 172 + if (ret) { 173 + if (ret == -EBADF || ret == -EINVAL) 174 + return 0; 175 + fprintf(stderr, "register files failed: %d\n", ret); 176 + return 1; 177 + } 178 + 179 + if (pipe(fds) < 0) { 180 + perror("pipe"); 181 + return 1; 182 + } 183 + 184 + fd = fds[0]; 185 + ret = io_uring_register_files(src, &fd, 1); 186 + if (ret) { 187 + fprintf(stderr, "register files failed: %d\n", ret); 188 + return 1; 189 + } 190 + 191 + memset(dst_buf, 0, sizeof(dst_buf)); 192 + for (i = 0; i < ARRAY_SIZE(buf); i++) 193 + buf[i] = rand(); 194 + 195 + sqe = io_uring_get_sqe(src); 196 + io_uring_prep_write(sqe, fds[1], buf, sizeof(buf), 0); 197 + sqe->user_data = 1; 198 + 199 + sqe = io_uring_get_sqe(src); 200 + io_uring_prep_msg_ring_fd(sqe, dst->ring_fd, 0, 0, 10, 0); 201 + sqe->user_data = 2; 202 + 203 + io_uring_submit(src); 204 + 205 + fd = -1; 206 + for (i = 0; i < 2; i++) { 207 + ret = io_uring_wait_cqe(src, &cqe); 208 + if (ret) { 209 + fprintf(stderr, "wait_cqe: %d\n", ret); 210 + return 1; 211 + } 212 + if (cqe->user_data == 2 && cqe->res == -EINVAL) { 213 + no_fd_pass = 1; 214 + } else if (cqe->res < 0) { 215 + fprintf(stderr, "cqe res %d\n", cqe->res); 216 + return 1; 217 + } 218 + if (cqe->user_data == 1 && cqe->res != sizeof(buf)) { 219 + fprintf(stderr, "short write %d\n", cqe->res); 220 + return 1; 221 + } 222 + io_uring_cqe_seen(src, cqe); 223 + } 224 + 225 + if (no_fd_pass) 226 + goto out; 227 + 228 + ret = io_uring_wait_cqe(dst, &cqe); 229 + if (ret) { 230 + fprintf(stderr, "wait_cqe dst: %d\n", ret); 231 + return 1; 232 + } 233 + if (cqe->res < 0) { 234 + fprintf(stderr, "cqe error dst: %d\n", cqe->res); 235 + return 1; 236 + } 237 + 238 + fd = cqe->res; 239 + io_uring_cqe_seen(dst, cqe); 240 + sqe = io_uring_get_sqe(dst); 241 + io_uring_prep_read(sqe, fd, dst_buf, sizeof(dst_buf), 0); 242 + sqe->flags |= IOSQE_FIXED_FILE; 243 + sqe->user_data = 3; 244 + io_uring_submit(dst); 245 + 246 + ret = io_uring_wait_cqe(dst, &cqe); 247 + if (ret) { 248 + fprintf(stderr, "wait_cqe dst: %d\n", ret); 249 + return 1; 250 + } 251 + if (cqe->res < 0) { 252 + fprintf(stderr, "cqe error dst: %d\n", cqe->res); 253 + return 1; 254 + } 255 + if (cqe->res != sizeof(dst_buf)) { 256 + fprintf(stderr, "short read %d\n", cqe->res); 257 + return 1; 258 + } 259 + if (memcmp(buf, dst_buf, sizeof(buf))) { 260 + fprintf(stderr, "buffers differ\n"); 261 + return 1; 262 + } 263 + 264 + out: 265 + close(fds[0]); 266 + close(fds[1]); 267 + io_uring_unregister_files(src); 268 + io_uring_unregister_files(dst); 269 + return 0; 270 + } 271 + 272 + static int test(int ring_flags) 273 + { 274 + struct io_uring ring, ring2; 275 + int ret; 276 + 277 + ret = io_uring_queue_init(8, &ring, ring_flags); 278 + if (ret) { 279 + if (ret == -EINVAL) 280 + return 0; 281 + fprintf(stderr, "ring setup failed: %d\n", ret); 282 + return T_EXIT_FAIL; 283 + } 284 + ret = io_uring_queue_init(8, &ring2, ring_flags); 285 + if (ret) { 286 + fprintf(stderr, "ring setup failed: %d\n", ret); 287 + return T_EXIT_FAIL; 288 + } 289 + 290 + ret = test_local(&ring, &ring2); 291 + if (ret) { 292 + fprintf(stderr, "test local failed\n"); 293 + return T_EXIT_FAIL; 294 + } 295 + if (no_msg) 296 + return T_EXIT_SKIP; 297 + 298 + ret = test_remote(&ring, ring_flags); 299 + if (ret) { 300 + fprintf(stderr, "test_remote failed\n"); 301 + return T_EXIT_FAIL; 302 + } 303 + 304 + io_uring_queue_exit(&ring); 305 + io_uring_queue_exit(&ring2); 306 + return T_EXIT_PASS; 307 + } 308 + 309 + int main(int argc, char *argv[]) 310 + { 311 + int ret; 312 + 313 + if (argc > 1) 314 + return T_EXIT_SKIP; 315 + 316 + ret = test(0); 317 + if (ret != T_EXIT_PASS) { 318 + fprintf(stderr, "ring flags 0 failed\n"); 319 + return ret; 320 + } 321 + if (no_msg) 322 + return T_EXIT_SKIP; 323 + 324 + ret = test(IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN); 325 + if (ret != T_EXIT_PASS) { 326 + fprintf(stderr, "ring flags defer failed\n"); 327 + return ret; 328 + } 329 + 330 + return ret; 331 + }

+26 -6

vendor/liburing/test/msg-ring-flags.c

··· 117 117 return NULL; 118 118 } 119 119 120 - int main(int argc, char *argv[]) 120 + static int test(int ring_flags) 121 121 { 122 122 struct io_uring ring, ring2; 123 123 pthread_t thread; ··· 125 125 void *ret2; 126 126 int ret, i; 127 127 128 - if (argc > 1) 129 - return T_EXIT_SKIP; 130 - 131 - ret = io_uring_queue_init(2, &ring, 0); 128 + ret = io_uring_queue_init(2, &ring, ring_flags); 132 129 if (ret) { 133 130 fprintf(stderr, "io_uring_queue_init failed for ring1: %d\n", ret); 134 131 return T_EXIT_FAIL; 135 132 } 136 133 137 - ret = io_uring_queue_init(2, &ring2, 0); 134 + ret = io_uring_queue_init(2, &ring2, ring_flags); 138 135 if (ret) { 139 136 fprintf(stderr, "io_uring_queue_init failed for ring2: %d\n", ret); 140 137 return T_EXIT_FAIL; ··· 190 187 191 188 return T_EXIT_PASS; 192 189 } 190 + 191 + int main(int argc, char *argv[]) 192 + { 193 + int ret; 194 + 195 + if (argc > 1) 196 + return T_EXIT_SKIP; 197 + 198 + ret = test(0); 199 + if (ret == T_EXIT_FAIL) { 200 + fprintf(stderr, "test ring_flags 0 failed\n"); 201 + return ret; 202 + } else if (ret == T_EXIT_SKIP) 203 + return ret; 204 + 205 + ret = test(IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN); 206 + if (ret == T_EXIT_FAIL) { 207 + fprintf(stderr, "test ring_flags defer failed\n"); 208 + return ret; 209 + } 210 + 211 + return ret; 212 + }

+62 -15

vendor/liburing/test/msg-ring.c

··· 72 72 return 1; 73 73 } 74 74 75 - static void *wait_cqe_fn(void *data) 75 + struct data { 76 + struct io_uring *ring; 77 + unsigned int flags; 78 + pthread_barrier_t startup; 79 + pthread_barrier_t barrier; 80 + }; 81 + 82 + static void *wait_cqe_fn(void *__data) 76 83 { 77 - struct io_uring *ring = data; 84 + struct data *d = __data; 78 85 struct io_uring_cqe *cqe; 86 + struct io_uring ring; 79 87 int ret; 80 88 81 - ret = io_uring_wait_cqe(ring, &cqe); 89 + io_uring_queue_init(4, &ring, d->flags); 90 + d->ring = &ring; 91 + pthread_barrier_wait(&d->startup); 92 + 93 + pthread_barrier_wait(&d->barrier); 94 + 95 + ret = io_uring_wait_cqe(&ring, &cqe); 82 96 if (ret) { 83 97 fprintf(stderr, "wait cqe %d\n", ret); 84 98 goto err; ··· 93 107 goto err; 94 108 } 95 109 96 - io_uring_cqe_seen(ring, cqe); 110 + io_uring_cqe_seen(&ring, cqe); 111 + io_uring_queue_exit(&ring); 97 112 return NULL; 98 113 err: 99 - io_uring_cqe_seen(ring, cqe); 114 + io_uring_cqe_seen(&ring, cqe); 115 + io_uring_queue_exit(&ring); 100 116 return (void *) (unsigned long) 1; 101 117 } 102 118 103 - static int test_remote(struct io_uring *ring, struct io_uring *target) 119 + static int test_remote(struct io_uring *ring, unsigned int ring_flags) 104 120 { 121 + struct io_uring *target; 105 122 pthread_t thread; 106 123 void *tret; 107 124 struct io_uring_cqe *cqe; 108 125 struct io_uring_sqe *sqe; 126 + struct data d; 109 127 int ret; 110 128 111 - pthread_create(&thread, NULL, wait_cqe_fn, target); 129 + d.flags = ring_flags; 130 + pthread_barrier_init(&d.barrier, NULL, 2); 131 + pthread_barrier_init(&d.startup, NULL, 2); 132 + pthread_create(&thread, NULL, wait_cqe_fn, &d); 133 + 134 + pthread_barrier_wait(&d.startup); 135 + target = d.ring; 112 136 113 137 sqe = io_uring_get_sqe(ring); 114 138 if (!sqe) { ··· 125 149 goto err; 126 150 } 127 151 152 + pthread_barrier_wait(&d.barrier); 153 + 128 154 ret = io_uring_wait_cqe(ring, &cqe); 129 155 if (ret < 0) { 130 156 fprintf(stderr, "wait completion %d\n", ret); ··· 132 158 } 133 159 if (cqe->res != 0) { 134 160 fprintf(stderr, "cqe res %d\n", cqe->res); 161 + io_uring_cqe_seen(ring, cqe); 135 162 return -1; 136 163 } 137 164 if (cqe->user_data != 1) { 138 165 fprintf(stderr, "user_data %llx\n", (long long) cqe->user_data); 166 + io_uring_cqe_seen(ring, cqe); 139 167 return -1; 140 168 } 141 169 ··· 314 342 return 0; 315 343 } 316 344 317 - int main(int argc, char *argv[]) 345 + static int test(int ring_flags) 318 346 { 319 347 struct io_uring ring, ring2, pring; 320 348 int ret, i; 321 349 322 - if (argc > 1) 323 - return T_EXIT_SKIP; 324 - 325 - ret = io_uring_queue_init(8, &ring, 0); 350 + ret = io_uring_queue_init(8, &ring, ring_flags); 326 351 if (ret) { 327 352 fprintf(stderr, "ring setup failed: %d\n", ret); 328 353 return T_EXIT_FAIL; 329 354 } 330 - ret = io_uring_queue_init(8, &ring2, 0); 355 + ret = io_uring_queue_init(8, &ring2, ring_flags); 331 356 if (ret) { 332 357 fprintf(stderr, "ring setup failed: %d\n", ret); 333 358 return T_EXIT_FAIL; 334 359 } 335 - ret = io_uring_queue_init(8, &pring, IORING_SETUP_IOPOLL); 360 + ret = io_uring_queue_init(8, &pring, ring_flags | IORING_SETUP_IOPOLL); 336 361 if (ret) { 337 362 fprintf(stderr, "ring setup failed: %d\n", ret); 338 363 return T_EXIT_FAIL; ··· 365 390 } 366 391 } 367 392 368 - ret = test_remote(&ring, &ring2); 393 + ret = test_remote(&ring, ring_flags); 369 394 if (ret) { 370 395 fprintf(stderr, "test_remote failed\n"); 371 396 return T_EXIT_FAIL; ··· 418 443 io_uring_queue_exit(&ring2); 419 444 return T_EXIT_PASS; 420 445 } 446 + 447 + int main(int argc, char *argv[]) 448 + { 449 + int ret; 450 + 451 + if (argc > 1) 452 + return T_EXIT_SKIP; 453 + 454 + ret = test(0); 455 + if (ret != T_EXIT_PASS) { 456 + fprintf(stderr, "ring flags 0 failed\n"); 457 + return ret; 458 + } 459 + 460 + ret = test(IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN); 461 + if (ret != T_EXIT_PASS) { 462 + fprintf(stderr, "ring flags defer failed\n"); 463 + return ret; 464 + } 465 + 466 + return ret; 467 + }

+6 -6

vendor/liburing/test/multicqes_drain.c

··· 38 38 * sqe_flags: combination of sqe flags 39 39 * multi_sqes: record the user_data/index of all the multishot sqes 40 40 * cnt: how many entries there are in multi_sqes 41 - * we can leverage multi_sqes array for cancellation: we randomly pick 42 - * up an entry in multi_sqes when form a cancellation sqe. 41 + * we can leverage multi_sqes array for cancelation: we randomly pick 42 + * up an entry in multi_sqes when form a cancelation sqe. 43 43 * multi_cap: limitation of number of multishot sqes 44 44 */ 45 45 static const unsigned sqe_flags[4] = { ··· 109 109 { 110 110 __u8 flags = 0; 111 111 /* 112 - * drain sqe must be put after multishot sqes cancelled 112 + * drain sqe must be put after multishot sqes canceled 113 113 */ 114 114 do { 115 115 flags = sqe_flags[rand() % 4]; ··· 124 124 /* 125 125 * avoid below case: 126 126 * sqe0(multishot, link)->sqe1(nop, link)->sqe2(nop)->sqe3(cancel_sqe0) 127 - * sqe3 may execute before sqe0 so that sqe0 isn't cancelled 127 + * sqe3 may execute before sqe0 so that sqe0 isn't canceled 128 128 */ 129 129 if (sqe_op == multi) 130 130 flags &= ~IOSQE_IO_LINK; ··· 233 233 } 234 234 235 235 sleep(1); 236 - // TODO: randomize event triggerring order 236 + // TODO: randomize event triggering order 237 237 for (i = 0; i < max_entry; i++) { 238 238 if (si[i].op != multi && si[i].op != single) 239 239 continue; ··· 265 265 } 266 266 } 267 267 /* 268 - * for multishot sqes, record them only when it is cancelled 268 + * for multishot sqes, record them only when it is canceled 269 269 */ 270 270 if ((si[index].op != multi) || (cqe_res[j] == -ECANCELED)) 271 271 compl_bits |= (1ULL << index);

+42

vendor/liburing/test/no-mmap-inval.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Description: test that using SETUP_NO_MMAP with an invalid SQ ring 4 + * address fails. 5 + * 6 + */ 7 + #include <stdlib.h> 8 + #include <sys/types.h> 9 + #include <stdio.h> 10 + #include <unistd.h> 11 + 12 + #include "liburing.h" 13 + #include "helpers.h" 14 + 15 + int main(int argc, char *argv[]) 16 + { 17 + struct io_uring_params p = { 18 + .sq_entries = 2, 19 + .cq_entries = 4, 20 + .flags = IORING_SETUP_NO_MMAP, 21 + }; 22 + struct io_uring ring; 23 + void *addr; 24 + int ret; 25 + 26 + if (argc > 1) 27 + return T_EXIT_SKIP; 28 + 29 + t_posix_memalign(&addr, sysconf(_SC_PAGESIZE), 8192); 30 + p.cq_off.user_addr = (unsigned long long) (uintptr_t) addr; 31 + 32 + ret = io_uring_queue_init_params(2, &ring, &p); 33 + if (ret == -EINVAL) { 34 + /* kernel doesn't support SETUP_NO_MMAP */ 35 + return T_EXIT_SKIP; 36 + } else if (ret && (ret != -EFAULT && ret != -ENOMEM)) { 37 + fprintf(stderr, "Got %d, wanted -EFAULT\n", ret); 38 + return T_EXIT_FAIL; 39 + } 40 + 41 + return T_EXIT_PASS; 42 + }

+5 -3

vendor/liburing/test/nolibc.c

··· 6 6 * 1) x86 7 7 * 2) x86-64 8 8 * 3) aarch64 9 + * 4) riscv64 9 10 * 10 11 */ 11 12 #include "helpers.h" 12 13 13 - #if !defined(__x86_64__) && !defined(__i386__) && !defined(__aarch64__) 14 + #if !defined(__x86_64__) && !defined(__i386__) && !defined(__aarch64__) && (!defined(__riscv) && __riscv_xlen != 64) 15 + 14 16 15 17 /* 16 18 * This arch doesn't support nolibc. ··· 20 22 return T_EXIT_SKIP; 21 23 } 22 24 23 - #else /* #if !defined(__x86_64__) && !defined(__i386__) && !defined(__aarch64__) */ 25 + #else /* #if !defined(__x86_64__) && !defined(__i386__) && !defined(__aarch64__) && (!defined(__riscv) && __riscv_xlen != 64) */ 24 26 25 27 #ifndef CONFIG_NOLIBC 26 28 #define CONFIG_NOLIBC ··· 57 59 return T_EXIT_PASS; 58 60 } 59 61 60 - #endif /* #if !defined(__x86_64__) && !defined(__i386__) && !defined(__aarch64__) */ 62 + #endif /* #if !defined(__x86_64__) && !defined(__i386__) && !defined(__aarch64__) && (!defined(__riscv) && __riscv_xlen != 64) */

+82

vendor/liburing/test/ooo-file-unreg.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Description: Test that out-of-order file updates with inflight requests 4 + * work as expected. 5 + * 6 + */ 7 + #include <stdio.h> 8 + #include <fcntl.h> 9 + #include <sys/socket.h> 10 + #include <unistd.h> 11 + #include <stdlib.h> 12 + #include <sys/poll.h> 13 + 14 + #include "liburing.h" 15 + #include "helpers.h" 16 + 17 + int main(int argc, char *argv[]) 18 + { 19 + struct io_uring_sqe *sqe; 20 + int res, fds[2], sockid; 21 + struct io_uring ring; 22 + 23 + if (argc > 1) 24 + return T_EXIT_SKIP; 25 + 26 + res = io_uring_queue_init(1, &ring, 0); 27 + if (res) { 28 + fprintf(stderr, "queue_init: %d\n", res); 29 + return T_EXIT_FAIL; 30 + } 31 + 32 + res = io_uring_register_files_sparse(&ring, 2); 33 + if (res) { 34 + if (res == -EINVAL) 35 + return T_EXIT_SKIP; 36 + fprintf(stderr, "sparse reg: %d\n", res); 37 + return T_EXIT_FAIL; 38 + } 39 + 40 + fds[0] = socket(AF_INET, SOCK_DGRAM, 0); 41 + if (fds[0] < 0) { 42 + perror("socket"); 43 + return T_EXIT_FAIL; 44 + } 45 + fds[1] = socket(AF_INET, SOCK_DGRAM, 0); 46 + if (fds[1] < 0) { 47 + perror("socket"); 48 + return T_EXIT_FAIL; 49 + } 50 + 51 + res = io_uring_register_files_update(&ring, 0, fds, 2); 52 + if (res != 2) { 53 + fprintf(stderr, "files updates; %d\n", res); 54 + return T_EXIT_FAIL; 55 + } 56 + 57 + sqe = io_uring_get_sqe(&ring); 58 + io_uring_prep_poll_add(sqe, 0, POLLIN); 59 + sqe->flags = IOSQE_FIXED_FILE; 60 + io_uring_submit(&ring); 61 + 62 + close(fds[0]); 63 + close(fds[1]); 64 + 65 + sockid = -1; 66 + res = io_uring_register_files_update(&ring, 1, &sockid, 1); 67 + if (res != 1) { 68 + fprintf(stderr, "files updates; %d\n", res); 69 + return T_EXIT_FAIL; 70 + } 71 + 72 + sockid = -1; 73 + res = io_uring_register_files_update(&ring, 0, &sockid, 1); 74 + if (res != 1) { 75 + fprintf(stderr, "files updates; %d\n", res); 76 + return T_EXIT_FAIL; 77 + } 78 + 79 + sleep(1); 80 + io_uring_queue_exit(&ring); 81 + return T_EXIT_PASS; 82 + }

+5 -1

vendor/liburing/test/openat2.c

··· 1 1 /* SPDX-License-Identifier: MIT */ 2 2 /* 3 - * Description: run various openat(2) tests 3 + * Description: run various openat2(2) tests 4 4 * 5 5 */ 6 6 #include <errno.h> ··· 72 72 } 73 73 ret = io_uring_register_files(&ring, &fd, 1); 74 74 if (ret) { 75 + if (ret == -EINVAL || ret == -EBADF) 76 + return 0; 75 77 fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); 76 78 return -1; 77 79 } ··· 141 143 142 144 ret = io_uring_register_files(&ring, &fd, 1); 143 145 if (ret) { 146 + if (ret == -EINVAL || ret == -EBADF) 147 + return 0; 144 148 fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); 145 149 return -1; 146 150 }

+29 -5

vendor/liburing/test/poll-cancel-all.c

··· 14 14 15 15 static int no_cancel_flags; 16 16 17 - static int test1(struct io_uring *ring, int *fd) 17 + static int test1(struct io_uring *ring, int *fd, int fixed) 18 18 { 19 19 struct io_uring_sqe *sqe; 20 20 struct io_uring_cqe *cqe; 21 - int ret, i; 21 + int ret, i, __fd = fd[0]; 22 + 23 + if (fixed) 24 + __fd = 0; 25 + 26 + if (fixed) { 27 + ret = io_uring_register_files(ring, fd, 1); 28 + if (ret) { 29 + fprintf(stderr, "failed file register %d\n", ret); 30 + return 1; 31 + } 32 + } 22 33 23 34 for (i = 0; i < 8; i++) { 24 35 sqe = io_uring_get_sqe(ring); ··· 27 38 return 1; 28 39 } 29 40 30 - io_uring_prep_poll_add(sqe, fd[0], POLLIN); 41 + io_uring_prep_poll_add(sqe, __fd, POLLIN); 31 42 sqe->user_data = i + 1; 43 + if (fixed) 44 + sqe->flags |= IOSQE_FIXED_FILE; 32 45 } 33 46 34 47 ret = io_uring_submit(ring); ··· 51 64 */ 52 65 io_uring_prep_cancel(sqe, 0, IORING_ASYNC_CANCEL_ALL); 53 66 sqe->cancel_flags |= IORING_ASYNC_CANCEL_FD; 54 - sqe->fd = fd[0]; 67 + if (fixed) 68 + sqe->cancel_flags |= IORING_ASYNC_CANCEL_FD_FIXED; 69 + sqe->fd = __fd; 55 70 sqe->user_data = 100; 56 71 57 72 ret = io_uring_submit(ring); ··· 92 107 } 93 108 io_uring_cqe_seen(ring, cqe); 94 109 } 110 + 111 + if (fixed) 112 + io_uring_unregister_files(ring); 95 113 96 114 return 0; 97 115 } ··· 442 460 return 1; 443 461 } 444 462 445 - ret = test1(&ring, fd); 463 + ret = test1(&ring, fd, 0); 446 464 if (ret) { 447 465 fprintf(stderr, "test1 failed\n"); 448 466 return ret; 449 467 } 450 468 if (no_cancel_flags) 451 469 return 0; 470 + 471 + ret = test1(&ring, fd, 1); 472 + if (ret) { 473 + fprintf(stderr, "test1 fixed failed\n"); 474 + return ret; 475 + } 452 476 453 477 ret = test2(&ring, fd); 454 478 if (ret) {

+2 -2

vendor/liburing/test/poll-cancel.c

··· 154 154 return 1; 155 155 } 156 156 157 - /* test timeout-offset triggering path during cancellation */ 157 + /* test timeout-offset triggering path during cancelation */ 158 158 sqe = io_uring_get_sqe(&ring); 159 159 io_uring_prep_timeout(sqe, &ts, off_nr, 0); 160 160 161 - /* poll ring2 to trigger cancellation on exit() */ 161 + /* poll ring2 to trigger cancelation on exit() */ 162 162 sqe = io_uring_get_sqe(&ring); 163 163 io_uring_prep_poll_add(sqe, ring2.ring_fd, POLLIN); 164 164 sqe->flags |= IOSQE_IO_LINK;

-426

vendor/liburing/test/pollfree.c

··· 1 - /* SPDX-License-Identifier: MIT */ 2 - // https://syzkaller.appspot.com/bug?id=5f5a44abb4cba056fe24255c4fcb7e7bbe13de7a 3 - // autogenerated by syzkaller (https://github.com/google/syzkaller) 4 - 5 - #include <dirent.h> 6 - #include <endian.h> 7 - #include <errno.h> 8 - #include <fcntl.h> 9 - #include <pthread.h> 10 - #include <signal.h> 11 - #include <stdarg.h> 12 - #include <stdbool.h> 13 - #include <stdint.h> 14 - #include <stdio.h> 15 - #include <stdlib.h> 16 - #include <string.h> 17 - #include <sys/mman.h> 18 - #include <sys/prctl.h> 19 - #include <sys/stat.h> 20 - #include <sys/syscall.h> 21 - #include <sys/types.h> 22 - #include <sys/wait.h> 23 - #include <time.h> 24 - #include <unistd.h> 25 - 26 - #include <linux/futex.h> 27 - 28 - #ifdef __NR_futex 29 - 30 - static void sleep_ms(uint64_t ms) 31 - { 32 - usleep(ms * 1000); 33 - } 34 - 35 - static uint64_t current_time_ms(void) 36 - { 37 - struct timespec ts; 38 - if (clock_gettime(CLOCK_MONOTONIC, &ts)) 39 - exit(1); 40 - return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000; 41 - } 42 - 43 - static void thread_start(void* (*fn)(void*), void* arg) 44 - { 45 - pthread_t th; 46 - pthread_attr_t attr; 47 - pthread_attr_init(&attr); 48 - pthread_attr_setstacksize(&attr, 128 << 10); 49 - int i = 0; 50 - for (; i < 100; i++) { 51 - if (pthread_create(&th, &attr, fn, arg) == 0) { 52 - pthread_attr_destroy(&attr); 53 - return; 54 - } 55 - if (errno == EAGAIN) { 56 - usleep(50); 57 - continue; 58 - } 59 - break; 60 - } 61 - exit(1); 62 - } 63 - 64 - typedef struct { 65 - int state; 66 - } event_t; 67 - 68 - static void event_init(event_t* ev) 69 - { 70 - ev->state = 0; 71 - } 72 - 73 - static void event_reset(event_t* ev) 74 - { 75 - ev->state = 0; 76 - } 77 - 78 - static void event_set(event_t* ev) 79 - { 80 - if (ev->state) 81 - exit(1); 82 - __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE); 83 - syscall(__NR_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000); 84 - } 85 - 86 - static void event_wait(event_t* ev) 87 - { 88 - while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE)) 89 - syscall(__NR_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0); 90 - } 91 - 92 - static int event_isset(event_t* ev) 93 - { 94 - return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE); 95 - } 96 - 97 - static int event_timedwait(event_t* ev, uint64_t timeout) 98 - { 99 - uint64_t start = current_time_ms(); 100 - uint64_t now = start; 101 - for (;;) { 102 - uint64_t remain = timeout - (now - start); 103 - struct timespec ts; 104 - ts.tv_sec = remain / 1000; 105 - ts.tv_nsec = (remain % 1000) * 1000 * 1000; 106 - syscall(__NR_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts); 107 - if (__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE)) 108 - return 1; 109 - now = current_time_ms(); 110 - if (now - start > timeout) 111 - return 0; 112 - } 113 - } 114 - 115 - #define SIZEOF_IO_URING_SQE 64 116 - #define SIZEOF_IO_URING_CQE 16 117 - #define SQ_HEAD_OFFSET 0 118 - #define SQ_TAIL_OFFSET 64 119 - #define SQ_RING_MASK_OFFSET 256 120 - #define SQ_RING_ENTRIES_OFFSET 264 121 - #define SQ_FLAGS_OFFSET 276 122 - #define SQ_DROPPED_OFFSET 272 123 - #define CQ_HEAD_OFFSET 128 124 - #define CQ_TAIL_OFFSET 192 125 - #define CQ_RING_MASK_OFFSET 260 126 - #define CQ_RING_ENTRIES_OFFSET 268 127 - #define CQ_RING_OVERFLOW_OFFSET 284 128 - #define CQ_FLAGS_OFFSET 280 129 - #define CQ_CQES_OFFSET 320 130 - 131 - struct io_sqring_offsets { 132 - uint32_t head; 133 - uint32_t tail; 134 - uint32_t ring_mask; 135 - uint32_t ring_entries; 136 - uint32_t flags; 137 - uint32_t dropped; 138 - uint32_t array; 139 - uint32_t resv1; 140 - uint64_t resv2; 141 - }; 142 - 143 - struct io_cqring_offsets { 144 - uint32_t head; 145 - uint32_t tail; 146 - uint32_t ring_mask; 147 - uint32_t ring_entries; 148 - uint32_t overflow; 149 - uint32_t cqes; 150 - uint64_t resv[2]; 151 - }; 152 - 153 - struct io_uring_params { 154 - uint32_t sq_entries; 155 - uint32_t cq_entries; 156 - uint32_t flags; 157 - uint32_t sq_thread_cpu; 158 - uint32_t sq_thread_idle; 159 - uint32_t features; 160 - uint32_t resv[4]; 161 - struct io_sqring_offsets sq_off; 162 - struct io_cqring_offsets cq_off; 163 - }; 164 - 165 - #define IORING_OFF_SQ_RING 0 166 - #define IORING_OFF_SQES 0x10000000ULL 167 - 168 - #define sys_io_uring_setup 425 169 - static long syz_io_uring_setup(volatile long a0, volatile long a1, 170 - volatile long a2, volatile long a3, 171 - volatile long a4, volatile long a5) 172 - { 173 - uint32_t entries = (uint32_t)a0; 174 - struct io_uring_params* setup_params = (struct io_uring_params*)a1; 175 - void* vma1 = (void*)a2; 176 - void* vma2 = (void*)a3; 177 - void** ring_ptr_out = (void**)a4; 178 - void** sqes_ptr_out = (void**)a5; 179 - uint32_t fd_io_uring = syscall(sys_io_uring_setup, entries, setup_params); 180 - uint32_t sq_ring_sz = 181 - setup_params->sq_off.array + setup_params->sq_entries * sizeof(uint32_t); 182 - uint32_t cq_ring_sz = setup_params->cq_off.cqes + 183 - setup_params->cq_entries * SIZEOF_IO_URING_CQE; 184 - uint32_t ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz; 185 - *ring_ptr_out = mmap(vma1, ring_sz, PROT_READ | PROT_WRITE, 186 - MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, 187 - IORING_OFF_SQ_RING); 188 - uint32_t sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE; 189 - *sqes_ptr_out = 190 - mmap(vma2, sqes_sz, PROT_READ | PROT_WRITE, 191 - MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, IORING_OFF_SQES); 192 - return fd_io_uring; 193 - } 194 - 195 - static long syz_io_uring_submit(volatile long a0, volatile long a1, 196 - volatile long a2, volatile long a3) 197 - { 198 - char* ring_ptr = (char*)a0; 199 - char* sqes_ptr = (char*)a1; 200 - char* sqe = (char*)a2; 201 - uint32_t sqes_index = (uint32_t)a3; 202 - uint32_t sq_ring_entries = *(uint32_t*)(ring_ptr + SQ_RING_ENTRIES_OFFSET); 203 - uint32_t cq_ring_entries = *(uint32_t*)(ring_ptr + CQ_RING_ENTRIES_OFFSET); 204 - uint32_t sq_array_off = 205 - (CQ_CQES_OFFSET + cq_ring_entries * SIZEOF_IO_URING_CQE + 63) & ~63; 206 - if (sq_ring_entries) 207 - sqes_index %= sq_ring_entries; 208 - char* sqe_dest = sqes_ptr + sqes_index * SIZEOF_IO_URING_SQE; 209 - memcpy(sqe_dest, sqe, SIZEOF_IO_URING_SQE); 210 - uint32_t sq_ring_mask = *(uint32_t*)(ring_ptr + SQ_RING_MASK_OFFSET); 211 - uint32_t* sq_tail_ptr = (uint32_t*)(ring_ptr + SQ_TAIL_OFFSET); 212 - uint32_t sq_tail = *sq_tail_ptr & sq_ring_mask; 213 - uint32_t sq_tail_next = *sq_tail_ptr + 1; 214 - uint32_t* sq_array = (uint32_t*)(ring_ptr + sq_array_off); 215 - *(sq_array + sq_tail) = sqes_index; 216 - __atomic_store_n(sq_tail_ptr, sq_tail_next, __ATOMIC_RELEASE); 217 - return 0; 218 - } 219 - 220 - static void kill_and_wait(int pid, int* status) 221 - { 222 - kill(-pid, SIGKILL); 223 - kill(pid, SIGKILL); 224 - for (int i = 0; i < 100; i++) { 225 - if (waitpid(-1, status, WNOHANG | __WALL) == pid) 226 - return; 227 - usleep(1000); 228 - } 229 - DIR* dir = opendir("/sys/fs/fuse/connections"); 230 - if (dir) { 231 - for (;;) { 232 - struct dirent* ent = readdir(dir); 233 - if (!ent) 234 - break; 235 - if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) 236 - continue; 237 - char abort[300]; 238 - snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", 239 - ent->d_name); 240 - int fd = open(abort, O_WRONLY); 241 - if (fd == -1) { 242 - continue; 243 - } 244 - if (write(fd, abort, 1) < 0) { 245 - } 246 - close(fd); 247 - } 248 - closedir(dir); 249 - } else { 250 - } 251 - while (waitpid(-1, status, __WALL) != pid) { 252 - } 253 - } 254 - 255 - static void setup_test(void) 256 - { 257 - prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); 258 - setpgrp(); 259 - } 260 - 261 - struct thread_t { 262 - int created, call; 263 - event_t ready, done; 264 - }; 265 - 266 - static struct thread_t threads[16]; 267 - static void execute_call(int call); 268 - static int running; 269 - 270 - static void* thr(void* arg) 271 - { 272 - struct thread_t* th = (struct thread_t*)arg; 273 - for (;;) { 274 - event_wait(&th->ready); 275 - event_reset(&th->ready); 276 - execute_call(th->call); 277 - __atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED); 278 - event_set(&th->done); 279 - } 280 - return 0; 281 - } 282 - 283 - static void execute_one(void) 284 - { 285 - int i, call, thread; 286 - for (call = 0; call < 4; call++) { 287 - for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0])); 288 - thread++) { 289 - struct thread_t* th = &threads[thread]; 290 - if (!th->created) { 291 - th->created = 1; 292 - event_init(&th->ready); 293 - event_init(&th->done); 294 - event_set(&th->done); 295 - thread_start(thr, th); 296 - } 297 - if (!event_isset(&th->done)) 298 - continue; 299 - event_reset(&th->done); 300 - th->call = call; 301 - __atomic_fetch_add(&running, 1, __ATOMIC_RELAXED); 302 - event_set(&th->ready); 303 - event_timedwait(&th->done, 50); 304 - break; 305 - } 306 - } 307 - for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++) 308 - sleep_ms(1); 309 - } 310 - 311 - static void execute_one(void); 312 - 313 - #define WAIT_FLAGS __WALL 314 - 315 - static void loop(void) 316 - { 317 - int iter = 0; 318 - for (; iter < 5000; iter++) { 319 - int pid = fork(); 320 - if (pid < 0) 321 - exit(1); 322 - if (pid == 0) { 323 - setup_test(); 324 - execute_one(); 325 - exit(0); 326 - } 327 - int status = 0; 328 - uint64_t start = current_time_ms(); 329 - for (;;) { 330 - if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid) 331 - break; 332 - sleep_ms(1); 333 - if (current_time_ms() - start < 5000) 334 - continue; 335 - kill_and_wait(pid, &status); 336 - break; 337 - } 338 - } 339 - } 340 - 341 - #ifndef __NR_io_uring_enter 342 - #define __NR_io_uring_enter 426 343 - #endif 344 - 345 - static uint64_t r[4] = {0xffffffffffffffff, 0xffffffffffffffff, 0x0, 0x0}; 346 - 347 - void execute_call(int call) 348 - { 349 - intptr_t res = 0; 350 - switch (call) { 351 - case 0: 352 - *(uint64_t*)0x200000c0 = 0; 353 - res = syscall(__NR_signalfd4, -1, 0x200000c0ul, 8ul, 0ul); 354 - if (res != -1) 355 - r[0] = res; 356 - break; 357 - case 1: 358 - *(uint32_t*)0x20000a84 = 0; 359 - *(uint32_t*)0x20000a88 = 0; 360 - *(uint32_t*)0x20000a8c = 0; 361 - *(uint32_t*)0x20000a90 = 0; 362 - *(uint32_t*)0x20000a98 = -1; 363 - memset((void*)0x20000a9c, 0, 12); 364 - res = -1; 365 - res = syz_io_uring_setup(0x87, 0x20000a80, 0x206d6000, 0x206d7000, 366 - 0x20000000, 0x20000040); 367 - if (res != -1) { 368 - r[1] = res; 369 - r[2] = *(uint64_t*)0x20000000; 370 - r[3] = *(uint64_t*)0x20000040; 371 - } 372 - break; 373 - case 2: 374 - *(uint8_t*)0x20002240 = 6; 375 - *(uint8_t*)0x20002241 = 0; 376 - *(uint16_t*)0x20002242 = 0; 377 - *(uint32_t*)0x20002244 = r[0]; 378 - *(uint64_t*)0x20002248 = 0; 379 - *(uint64_t*)0x20002250 = 0; 380 - *(uint32_t*)0x20002258 = 0; 381 - *(uint16_t*)0x2000225c = 0; 382 - *(uint16_t*)0x2000225e = 0; 383 - *(uint64_t*)0x20002260 = 0; 384 - *(uint16_t*)0x20002268 = 0; 385 - *(uint16_t*)0x2000226a = 0; 386 - memset((void*)0x2000226c, 0, 20); 387 - syz_io_uring_submit(r[2], r[3], 0x20002240, 0); 388 - break; 389 - case 3: 390 - syscall(__NR_io_uring_enter, r[1], 0x1523a, 0, 0ul, 0ul, 0xaul); 391 - break; 392 - } 393 - } 394 - 395 - int main(int argc, char *argv[]) 396 - { 397 - void *ret; 398 - 399 - #if !defined(__i386) && !defined(__x86_64__) 400 - return 0; 401 - #endif 402 - 403 - if (argc > 1) 404 - return 0; 405 - 406 - ret = mmap((void *)0x1ffff000ul, 0x1000ul, 0ul, MAP_ANON|MAP_PRIVATE, -1, 0ul); 407 - if (ret == MAP_FAILED) 408 - return 0; 409 - ret = mmap((void *)0x20000000ul, 0x1000000ul, 7ul, MAP_ANON|MAP_PRIVATE, -1, 0ul); 410 - if (ret == MAP_FAILED) 411 - return 0; 412 - ret = mmap((void *)0x21000000ul, 0x1000ul, 0ul, MAP_ANON|MAP_PRIVATE, -1, 0ul); 413 - if (ret == MAP_FAILED) 414 - return 0; 415 - loop(); 416 - return 0; 417 - } 418 - 419 - #else /* __NR_futex */ 420 - 421 - int main(int argc, char *argv[]) 422 - { 423 - return 0; 424 - } 425 - 426 - #endif /* __NR_futex */

+153

vendor/liburing/test/read-mshot-empty.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Description: test that multishot read correctly keeps reading until all 4 + * data has been emptied. the original implementation failed 5 + * to do so, if the available buffer size was less than what 6 + * was available, hence requiring multiple reads to empty the 7 + * file buffer. 8 + */ 9 + #include <stdio.h> 10 + #include <unistd.h> 11 + #include <stdlib.h> 12 + #include <string.h> 13 + #include <assert.h> 14 + #include <pthread.h> 15 + #include <sys/time.h> 16 + 17 + #include "liburing.h" 18 + #include "helpers.h" 19 + 20 + #define BGID 17 21 + #define NR_BUFS 4 22 + #define BR_MASK (NR_BUFS - 1) 23 + #define BUF_SIZE 32 24 + 25 + static int do_write(int fd, void *buf, int buf_size) 26 + { 27 + int ret; 28 + 29 + ret = write(fd, buf, buf_size); 30 + if (ret < 0) { 31 + perror("write"); 32 + return 0; 33 + } else if (ret != buf_size) { 34 + fprintf(stderr, "bad write size %d\n", ret); 35 + return 0; 36 + } 37 + 38 + return 1; 39 + } 40 + 41 + static void *thread_fn(void *data) 42 + { 43 + char w1[BUF_SIZE], w2[BUF_SIZE]; 44 + int *fds = data; 45 + 46 + memset(w1, 0x11, BUF_SIZE); 47 + memset(w2, 0x22, BUF_SIZE); 48 + 49 + if (!do_write(fds[1], w1, BUF_SIZE)) 50 + return NULL; 51 + if (!do_write(fds[1], w2, BUF_SIZE)) 52 + return NULL; 53 + 54 + usleep(100000); 55 + 56 + if (!do_write(fds[1], w1, BUF_SIZE)) 57 + return NULL; 58 + if (!do_write(fds[1], w2, BUF_SIZE)) 59 + return NULL; 60 + 61 + return NULL; 62 + } 63 + 64 + int main(int argc, char *argv[]) 65 + { 66 + struct io_uring_buf_ring *br; 67 + struct io_uring_sqe *sqe; 68 + struct io_uring_cqe *cqe; 69 + struct io_uring ring; 70 + pthread_t thread; 71 + int i, ret, fds[2]; 72 + void *buf, *tret; 73 + 74 + if (argc > 1) 75 + return T_EXIT_SKIP; 76 + 77 + if (pipe(fds) < 0) { 78 + perror("pipe"); 79 + return T_EXIT_FAIL; 80 + } 81 + 82 + ret = io_uring_queue_init(8, &ring, 0); 83 + if (ret) { 84 + fprintf(stderr, "queue_init: %d\n", ret); 85 + return T_EXIT_FAIL; 86 + } 87 + 88 + br = io_uring_setup_buf_ring(&ring, NR_BUFS, BGID, 0, &ret); 89 + if (!br) { 90 + if (ret == -EINVAL) 91 + return T_EXIT_SKIP; 92 + fprintf(stderr, "failed buffer ring %d\n", ret); 93 + return T_EXIT_FAIL; 94 + } 95 + 96 + buf = malloc(NR_BUFS * BUF_SIZE); 97 + for (i = 0; i < NR_BUFS; i++) { 98 + void *this_buf = buf + i * BUF_SIZE; 99 + 100 + io_uring_buf_ring_add(br, this_buf, BUF_SIZE, i, BR_MASK, i); 101 + } 102 + io_uring_buf_ring_advance(br, NR_BUFS); 103 + 104 + sqe = io_uring_get_sqe(&ring); 105 + io_uring_prep_read_multishot(sqe, fds[0], 0, 0, BGID); 106 + 107 + ret = io_uring_submit(&ring); 108 + if (ret != 1) { 109 + fprintf(stderr, "bad submit %d\n", ret); 110 + return T_EXIT_FAIL; 111 + } 112 + 113 + /* 114 + * read multishot not available would be ready as a cqe when 115 + * submission returns, check and skip if not. 116 + */ 117 + ret = io_uring_peek_cqe(&ring, &cqe); 118 + if (!ret) { 119 + if (cqe->res == -EINVAL || cqe->res == -EBADF) 120 + return T_EXIT_SKIP; 121 + } 122 + 123 + pthread_create(&thread, NULL, thread_fn, fds); 124 + 125 + for (i = 0; i < 4; i++) { 126 + int buf_index; 127 + 128 + ret = io_uring_wait_cqe(&ring, &cqe); 129 + if (ret) { 130 + fprintf(stderr, "wait %d\n", ret); 131 + break; 132 + } 133 + 134 + if (cqe->res != BUF_SIZE) { 135 + fprintf(stderr, "size %d\n", cqe->res); 136 + return T_EXIT_FAIL; 137 + } 138 + if (!(cqe->flags & IORING_CQE_F_BUFFER)) { 139 + fprintf(stderr, "buffer not set\n"); 140 + return T_EXIT_FAIL; 141 + } 142 + if (!(cqe->flags & IORING_CQE_F_MORE)) { 143 + fprintf(stderr, "more not set\n"); 144 + return T_EXIT_FAIL; 145 + } 146 + buf_index = cqe->flags >> 16; 147 + assert(buf_index >= 0 && buf_index <= NR_BUFS); 148 + io_uring_cqe_seen(&ring, cqe); 149 + } 150 + 151 + pthread_join(thread, &tret); 152 + return T_EXIT_PASS; 153 + }

+404

vendor/liburing/test/read-mshot.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Description: test multishot read (IORING_OP_READ_MULTISHOT) on pipes, 4 + * using ring provided buffers 5 + * 6 + */ 7 + #include <errno.h> 8 + #include <stdio.h> 9 + #include <unistd.h> 10 + #include <stdlib.h> 11 + #include <string.h> 12 + #include <fcntl.h> 13 + 14 + #include "liburing.h" 15 + #include "helpers.h" 16 + 17 + #define BUF_SIZE 32 18 + #define BUF_SIZE_FIRST 17 19 + #define NR_BUFS 64 20 + #define BUF_BGID 1 21 + 22 + #define BR_MASK (NR_BUFS - 1) 23 + 24 + #define NR_OVERFLOW (NR_BUFS / 4) 25 + 26 + static int no_buf_ring, no_read_mshot; 27 + 28 + static int test_clamp(void) 29 + { 30 + struct io_uring_buf_ring *br; 31 + struct io_uring_params p = { }; 32 + struct io_uring_sqe *sqe; 33 + struct io_uring_cqe *cqe; 34 + struct io_uring ring; 35 + int ret, fds[2], i; 36 + char tmp[32]; 37 + char *buf; 38 + void *ptr; 39 + 40 + ret = io_uring_queue_init_params(4, &ring, &p); 41 + if (ret) { 42 + fprintf(stderr, "ring setup failed: %d\n", ret); 43 + return 1; 44 + } 45 + 46 + if (pipe(fds) < 0) { 47 + perror("pipe"); 48 + return 1; 49 + } 50 + 51 + if (posix_memalign((void **) &buf, 4096, NR_BUFS * BUF_SIZE)) 52 + return 1; 53 + 54 + br = io_uring_setup_buf_ring(&ring, NR_BUFS, BUF_BGID, 0, &ret); 55 + if (!br) { 56 + if (ret == -EINVAL) { 57 + no_buf_ring = 1; 58 + return 0; 59 + } 60 + fprintf(stderr, "Buffer ring register failed %d\n", ret); 61 + return 1; 62 + } 63 + 64 + ptr = buf; 65 + io_uring_buf_ring_add(br, buf, 16, 1, BR_MASK, 0); 66 + buf += 16; 67 + io_uring_buf_ring_add(br, buf, 32, 2, BR_MASK, 1); 68 + buf += 32; 69 + io_uring_buf_ring_add(br, buf, 32, 3, BR_MASK, 2); 70 + buf += 32; 71 + io_uring_buf_ring_add(br, buf, 32, 4, BR_MASK, 3); 72 + buf += 32; 73 + io_uring_buf_ring_advance(br, 4); 74 + 75 + memset(tmp, 0xaa, sizeof(tmp)); 76 + 77 + sqe = io_uring_get_sqe(&ring); 78 + io_uring_prep_read_multishot(sqe, fds[0], 0, 0, BUF_BGID); 79 + 80 + ret = io_uring_submit(&ring); 81 + if (ret != 1) { 82 + fprintf(stderr, "submit: %d\n", ret); 83 + return 1; 84 + } 85 + 86 + /* prevent pipe buffer merging */ 87 + usleep(1000); 88 + ret = write(fds[1], tmp, 16); 89 + 90 + usleep(1000); 91 + ret = write(fds[1], tmp, sizeof(tmp)); 92 + 93 + /* prevent pipe buffer merging */ 94 + usleep(1000); 95 + ret = write(fds[1], tmp, 16); 96 + 97 + usleep(1000); 98 + ret = write(fds[1], tmp, sizeof(tmp)); 99 + 100 + /* 101 + * We should see a 16 byte completion, then a 32 byte, then a 16 byte, 102 + * and finally a 32 byte again. 103 + */ 104 + for (i = 0; i < 4; i++) { 105 + ret = io_uring_wait_cqe(&ring, &cqe); 106 + if (ret) { 107 + fprintf(stderr, "wait cqe failed %d\n", ret); 108 + return 1; 109 + } 110 + if (cqe->res < 0) { 111 + fprintf(stderr, "cqe res: %d\n", cqe->res); 112 + return 1; 113 + } 114 + if (!(cqe->flags & IORING_CQE_F_MORE)) { 115 + fprintf(stderr, "no more cqes\n"); 116 + return 1; 117 + } 118 + if (i == 0 || i == 2) { 119 + if (cqe->res != 16) { 120 + fprintf(stderr, "%d cqe got %d\n", i, cqe->res); 121 + return 1; 122 + } 123 + } else if (i == 1 || i == 3) { 124 + if (cqe->res != 32) { 125 + fprintf(stderr, "%d cqe got %d\n", i, cqe->res); 126 + return 1; 127 + } 128 + } 129 + io_uring_cqe_seen(&ring, cqe); 130 + } 131 + 132 + io_uring_queue_exit(&ring); 133 + free(ptr); 134 + return 0; 135 + } 136 + 137 + static int test(int first_good, int async, int overflow) 138 + { 139 + struct io_uring_buf_ring *br; 140 + struct io_uring_params p = { }; 141 + struct io_uring_sqe *sqe; 142 + struct io_uring_cqe *cqe; 143 + struct io_uring ring; 144 + int ret, fds[2], i; 145 + char tmp[32]; 146 + void *ptr[NR_BUFS]; 147 + 148 + p.flags = IORING_SETUP_CQSIZE; 149 + if (!overflow) 150 + p.cq_entries = NR_BUFS + 1; 151 + else 152 + p.cq_entries = NR_OVERFLOW; 153 + ret = io_uring_queue_init_params(1, &ring, &p); 154 + if (ret) { 155 + fprintf(stderr, "ring setup failed: %d\n", ret); 156 + return 1; 157 + } 158 + 159 + if (pipe(fds) < 0) { 160 + perror("pipe"); 161 + return 1; 162 + } 163 + 164 + br = io_uring_setup_buf_ring(&ring, NR_BUFS, BUF_BGID, 0, &ret); 165 + if (!br) { 166 + if (ret == -EINVAL) { 167 + no_buf_ring = 1; 168 + return 0; 169 + } 170 + fprintf(stderr, "Buffer ring register failed %d\n", ret); 171 + return 1; 172 + } 173 + 174 + for (i = 0; i < NR_BUFS; i++) { 175 + unsigned size = i <= 1 ? BUF_SIZE_FIRST : BUF_SIZE; 176 + ptr[i] = malloc(size); 177 + if (!ptr[i]) 178 + return 1; 179 + io_uring_buf_ring_add(br, ptr[i], size, i + 1, BR_MASK, i); 180 + } 181 + io_uring_buf_ring_advance(br, NR_BUFS); 182 + 183 + if (first_good) { 184 + sprintf(tmp, "this is buffer %d\n", 0); 185 + ret = write(fds[1], tmp, strlen(tmp)); 186 + } 187 + 188 + sqe = io_uring_get_sqe(&ring); 189 + /* len == 0 means just use the defined provided buffer length */ 190 + io_uring_prep_read_multishot(sqe, fds[0], 0, 0, BUF_BGID); 191 + if (async) 192 + sqe->flags |= IOSQE_ASYNC; 193 + 194 + ret = io_uring_submit(&ring); 195 + if (ret != 1) { 196 + fprintf(stderr, "submit: %d\n", ret); 197 + return 1; 198 + } 199 + 200 + /* write NR_BUFS + 1, or if first_good is set, NR_BUFS */ 201 + for (i = 0; i < NR_BUFS + !first_good; i++) { 202 + /* prevent pipe buffer merging */ 203 + usleep(1000); 204 + sprintf(tmp, "this is buffer %d\n", i + 1); 205 + ret = write(fds[1], tmp, strlen(tmp)); 206 + if (ret != strlen(tmp)) { 207 + fprintf(stderr, "write ret %d\n", ret); 208 + return 1; 209 + } 210 + } 211 + 212 + for (i = 0; i < NR_BUFS + 1; i++) { 213 + ret = io_uring_wait_cqe(&ring, &cqe); 214 + if (ret) { 215 + fprintf(stderr, "wait cqe failed %d\n", ret); 216 + return 1; 217 + } 218 + if (cqe->res < 0) { 219 + /* expected failure as we try to read one too many */ 220 + if (cqe->res == -ENOBUFS && i == NR_BUFS) 221 + break; 222 + if (!i && cqe->res == -EINVAL) { 223 + no_read_mshot = 1; 224 + break; 225 + } 226 + fprintf(stderr, "%d: cqe res %d\n", i, cqe->res); 227 + return 1; 228 + } else if (i > 9 && cqe->res <= 17) { 229 + fprintf(stderr, "truncated message %d %d\n", i, cqe->res); 230 + return 1; 231 + } 232 + 233 + if (!(cqe->flags & IORING_CQE_F_BUFFER)) { 234 + fprintf(stderr, "no buffer selected\n"); 235 + return 1; 236 + } 237 + if (!(cqe->flags & IORING_CQE_F_MORE)) { 238 + /* we expect this on overflow */ 239 + if (overflow && i >= NR_OVERFLOW) 240 + break; 241 + fprintf(stderr, "no more cqes\n"); 242 + return 1; 243 + } 244 + /* should've overflown! */ 245 + if (overflow && i > NR_OVERFLOW) { 246 + fprintf(stderr, "Expected overflow!\n"); 247 + return 1; 248 + } 249 + io_uring_cqe_seen(&ring, cqe); 250 + } 251 + 252 + io_uring_queue_exit(&ring); 253 + for (i = 0; i < NR_BUFS; i++) 254 + free(ptr[i]); 255 + return 0; 256 + } 257 + 258 + static int test_invalid(int async) 259 + { 260 + struct io_uring_buf_ring *br; 261 + struct io_uring_params p = { }; 262 + struct io_uring_sqe *sqe; 263 + struct io_uring_cqe *cqe; 264 + struct io_uring ring; 265 + char fname[32] = ".mshot.%d.XXXXXX"; 266 + int ret, fd; 267 + char *buf; 268 + 269 + p.flags = IORING_SETUP_CQSIZE; 270 + p.cq_entries = NR_BUFS; 271 + ret = io_uring_queue_init_params(1, &ring, &p); 272 + if (ret) { 273 + fprintf(stderr, "ring setup failed: %d\n", ret); 274 + return 1; 275 + } 276 + 277 + fd = mkstemp(fname); 278 + if (fd < 0) { 279 + perror("mkstemp"); 280 + return 1; 281 + } 282 + unlink(fname); 283 + 284 + if (posix_memalign((void **) &buf, 4096, BUF_SIZE)) 285 + return 1; 286 + 287 + br = io_uring_setup_buf_ring(&ring, 1, BUF_BGID, 0, &ret); 288 + if (!br) { 289 + fprintf(stderr, "Buffer ring register failed %d\n", ret); 290 + return 1; 291 + } 292 + 293 + io_uring_buf_ring_add(br, buf, BUF_SIZE, 1, BR_MASK, 0); 294 + io_uring_buf_ring_advance(br, 1); 295 + 296 + sqe = io_uring_get_sqe(&ring); 297 + /* len == 0 means just use the defined provided buffer length */ 298 + io_uring_prep_read_multishot(sqe, fd, 0, 0, BUF_BGID); 299 + if (async) 300 + sqe->flags |= IOSQE_ASYNC; 301 + 302 + ret = io_uring_submit(&ring); 303 + if (ret != 1) { 304 + fprintf(stderr, "submit: %d\n", ret); 305 + return 1; 306 + } 307 + 308 + ret = io_uring_wait_cqe(&ring, &cqe); 309 + if (ret) { 310 + fprintf(stderr, "wait cqe failed %d\n", ret); 311 + return 1; 312 + } 313 + if (cqe->flags & IORING_CQE_F_MORE) { 314 + fprintf(stderr, "MORE flag set unexpected %d\n", cqe->flags); 315 + return 1; 316 + } 317 + if (cqe->res != -EBADFD) { 318 + fprintf(stderr, "Got cqe res %d, wanted -EBADFD\n", cqe->res); 319 + return 1; 320 + } 321 + 322 + io_uring_cqe_seen(&ring, cqe); 323 + io_uring_queue_exit(&ring); 324 + free(buf); 325 + return 0; 326 + } 327 + 328 + int main(int argc, char *argv[]) 329 + { 330 + int ret; 331 + 332 + if (argc > 1) 333 + return T_EXIT_SKIP; 334 + 335 + ret = test(0, 0, 0); 336 + if (ret) { 337 + fprintf(stderr, "test 0 0 0 failed\n"); 338 + return T_EXIT_FAIL; 339 + } 340 + if (no_buf_ring || no_read_mshot) 341 + return T_EXIT_SKIP; 342 + 343 + ret = test(0, 1, 0); 344 + if (ret) { 345 + fprintf(stderr, "test 0 1 0, failed\n"); 346 + return T_EXIT_FAIL; 347 + } 348 + 349 + ret = test(1, 0, 0); 350 + if (ret) { 351 + fprintf(stderr, "test 1 0 0 failed\n"); 352 + return T_EXIT_FAIL; 353 + } 354 + 355 + ret = test(0, 0, 1); 356 + if (ret) { 357 + fprintf(stderr, "test 0 0 1 failed\n"); 358 + return T_EXIT_FAIL; 359 + } 360 + 361 + ret = test(0, 1, 1); 362 + if (ret) { 363 + fprintf(stderr, "test 0 1 1 failed\n"); 364 + return T_EXIT_FAIL; 365 + } 366 + 367 + ret = test(1, 0, 1); 368 + if (ret) { 369 + fprintf(stderr, "test 1 0 1, failed\n"); 370 + return T_EXIT_FAIL; 371 + } 372 + 373 + ret = test(1, 0, 1); 374 + if (ret) { 375 + fprintf(stderr, "test 1 0 1 failed\n"); 376 + return T_EXIT_FAIL; 377 + } 378 + 379 + ret = test(1, 1, 1); 380 + if (ret) { 381 + fprintf(stderr, "test 1 1 1 failed\n"); 382 + return T_EXIT_FAIL; 383 + } 384 + 385 + ret = test_invalid(0); 386 + if (ret) { 387 + fprintf(stderr, "test_invalid 0 failed\n"); 388 + return T_EXIT_FAIL; 389 + } 390 + 391 + ret = test_invalid(1); 392 + if (ret) { 393 + fprintf(stderr, "test_invalid 1 failed\n"); 394 + return T_EXIT_FAIL; 395 + } 396 + 397 + ret = test_clamp(); 398 + if (ret) { 399 + fprintf(stderr, "test_clamp failed\n"); 400 + return T_EXIT_FAIL; 401 + } 402 + 403 + return T_EXIT_PASS; 404 + }

+7 -3

vendor/liburing/test/recv-multishot.c

··· 57 57 int const N = 8; 58 58 int const N_BUFFS = N * 64; 59 59 int const N_CQE_OVERFLOW = 4; 60 - int const min_cqes = 2; 60 + int const min_cqes = args->early_error ? 2 : 8; 61 61 int const NAME_LEN = sizeof(struct sockaddr_storage); 62 62 int const CONTROL_LEN = CMSG_ALIGN(sizeof(struct sockaddr_storage)) 63 63 + sizeof(struct cmsghdr); ··· 237 237 usleep(1000); 238 238 239 239 if ((args->stream && !early_error) || recv_cqes < min_cqes) { 240 - ret = io_uring_wait_cqes(&ring, &cqe, 1, &timeout, NULL); 240 + unsigned int to_wait = 1; 241 + 242 + if (recv_cqes < min_cqes) 243 + to_wait = min_cqes - recv_cqes; 244 + ret = io_uring_wait_cqes(&ring, &cqe, to_wait, &timeout, NULL); 241 245 if (ret && ret != -ETIME) { 242 246 fprintf(stderr, "wait final failed: %d\n", ret); 243 247 ret = -1; ··· 271 275 */ 272 276 bool const early_last = args->early_error == ERROR_EARLY_OVERFLOW && 273 277 !args->wait_each && 274 - i == N_CQE_OVERFLOW && 278 + i >= N_CQE_OVERFLOW && 275 279 !(cqe->flags & IORING_CQE_F_MORE); 276 280 277 281 bool const should_be_last =

+691

vendor/liburing/test/recvsend_bundle.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Simple test case showing using send and recv bundles 4 + */ 5 + #include <errno.h> 6 + #include <stdio.h> 7 + #include <stdlib.h> 8 + #include <string.h> 9 + #include <unistd.h> 10 + #include <arpa/inet.h> 11 + #include <sys/types.h> 12 + #include <sys/socket.h> 13 + #include <pthread.h> 14 + 15 + #define MSG_SIZE 128 16 + #define NR_MIN_MSGS 4 17 + #define NR_MAX_MSGS 32 18 + #define SEQ_SIZE (MSG_SIZE / sizeof(unsigned long)) 19 + 20 + static int nr_msgs; 21 + static int use_tcp; 22 + 23 + #define RECV_BIDS 8192 24 + #define RECV_BID_MASK (RECV_BIDS - 1) 25 + 26 + #include "liburing.h" 27 + #include "helpers.h" 28 + 29 + #define PORT 10202 30 + #define HOST "127.0.0.1" 31 + 32 + static int use_port = PORT; 33 + 34 + #define SEND_BGID 7 35 + #define RECV_BGID 8 36 + 37 + static int no_send_mshot; 38 + 39 + struct recv_data { 40 + pthread_barrier_t connect; 41 + pthread_barrier_t startup; 42 + pthread_barrier_t barrier; 43 + pthread_barrier_t finish; 44 + unsigned long seq; 45 + int recv_bytes; 46 + int accept_fd; 47 + int abort; 48 + unsigned int max_sends; 49 + int to_eagain; 50 + void *recv_buf; 51 + 52 + int send_bundle; 53 + int recv_bundle; 54 + }; 55 + 56 + static int arm_recv(struct io_uring *ring, struct recv_data *rd) 57 + { 58 + struct io_uring_sqe *sqe; 59 + int ret; 60 + 61 + sqe = io_uring_get_sqe(ring); 62 + io_uring_prep_recv_multishot(sqe, rd->accept_fd, NULL, 0, 0); 63 + if (rd->recv_bundle && use_tcp) 64 + sqe->ioprio |= IORING_RECVSEND_BUNDLE; 65 + sqe->buf_group = RECV_BGID; 66 + sqe->flags |= IOSQE_BUFFER_SELECT; 67 + sqe->user_data = 2; 68 + 69 + ret = io_uring_submit(ring); 70 + if (ret != 1) { 71 + fprintf(stderr, "submit failed: %d\n", ret); 72 + return 1; 73 + } 74 + 75 + return 0; 76 + } 77 + 78 + static int recv_prep(struct io_uring *ring, struct recv_data *rd, int *sock) 79 + { 80 + struct sockaddr_in saddr; 81 + int sockfd, ret, val, use_fd; 82 + socklen_t socklen; 83 + 84 + memset(&saddr, 0, sizeof(saddr)); 85 + saddr.sin_family = AF_INET; 86 + saddr.sin_addr.s_addr = htonl(INADDR_ANY); 87 + saddr.sin_port = htons(use_port); 88 + 89 + if (use_tcp) 90 + sockfd = socket(AF_INET, SOCK_STREAM, 0); 91 + else 92 + sockfd = socket(AF_INET, SOCK_DGRAM, 0); 93 + if (sockfd < 0) { 94 + perror("socket"); 95 + return 1; 96 + } 97 + 98 + val = 1; 99 + setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); 100 + 101 + ret = bind(sockfd, (struct sockaddr *)&saddr, sizeof(saddr)); 102 + if (ret < 0) { 103 + perror("bind"); 104 + goto err; 105 + } 106 + 107 + if (use_tcp) { 108 + ret = listen(sockfd, 1); 109 + if (ret < 0) { 110 + perror("listen"); 111 + goto err; 112 + } 113 + 114 + pthread_barrier_wait(&rd->connect); 115 + 116 + socklen = sizeof(saddr); 117 + use_fd = accept(sockfd, (struct sockaddr *)&saddr, &socklen); 118 + if (use_fd < 0) { 119 + perror("accept"); 120 + goto err; 121 + } 122 + } else { 123 + use_fd = sockfd; 124 + pthread_barrier_wait(&rd->connect); 125 + } 126 + 127 + rd->accept_fd = use_fd; 128 + pthread_barrier_wait(&rd->startup); 129 + pthread_barrier_wait(&rd->barrier); 130 + 131 + if (arm_recv(ring, rd)) 132 + goto err; 133 + 134 + *sock = sockfd; 135 + return 0; 136 + err: 137 + close(sockfd); 138 + return 1; 139 + } 140 + 141 + static int verify_seq(struct recv_data *rd, void *verify_ptr, int verify_sz, 142 + int start_bid) 143 + { 144 + unsigned long *seqp; 145 + int seq_size = verify_sz / sizeof(unsigned long); 146 + int i; 147 + 148 + seqp = verify_ptr; 149 + for (i = 0; i < seq_size; i++) { 150 + if (rd->seq != *seqp) { 151 + fprintf(stderr, "bid=%d, got seq %lu, wanted %lu, offset %d\n", start_bid, *seqp, rd->seq, i); 152 + return 0; 153 + } 154 + seqp++; 155 + rd->seq++; 156 + } 157 + 158 + return 1; 159 + } 160 + 161 + static int recv_get_cqe(struct io_uring *ring, struct recv_data *rd, 162 + struct io_uring_cqe **cqe) 163 + { 164 + struct __kernel_timespec ts = { .tv_sec = 0, .tv_nsec = 100000000LL }; 165 + int ret; 166 + 167 + do { 168 + ret = io_uring_wait_cqe_timeout(ring, cqe, &ts); 169 + if (!ret) 170 + return 0; 171 + if (ret == -ETIME) { 172 + if (rd->abort) 173 + break; 174 + continue; 175 + } 176 + fprintf(stderr, "wait recv: %d\n", ret); 177 + break; 178 + } while (1); 179 + 180 + return 1; 181 + } 182 + 183 + static int do_recv(struct io_uring *ring, struct recv_data *rd) 184 + { 185 + struct io_uring_cqe *cqe; 186 + int bid, next_bid = 0; 187 + void *verify_ptr; 188 + int verify_sz = 0; 189 + int verify_bid = 0; 190 + 191 + verify_ptr = malloc(rd->recv_bytes); 192 + 193 + do { 194 + if (recv_get_cqe(ring, rd, &cqe)) 195 + break; 196 + if (cqe->res == -EINVAL) { 197 + fprintf(stdout, "recv not supported, skipping\n"); 198 + return 0; 199 + } 200 + if (cqe->res < 0) { 201 + fprintf(stderr, "failed recv cqe: %d\n", cqe->res); 202 + goto err; 203 + } 204 + if (!(cqe->flags & IORING_CQE_F_BUFFER)) { 205 + fprintf(stderr, "no buffer set in recv\n"); 206 + goto err; 207 + } 208 + bid = cqe->flags >> IORING_CQE_BUFFER_SHIFT; 209 + if (bid != next_bid) { 210 + fprintf(stderr, "got bid %d, wanted %d\n", bid, next_bid); 211 + goto err; 212 + } 213 + if (!rd->recv_bundle && cqe->res != MSG_SIZE) { 214 + fprintf(stderr, "recv got wrong length: %d\n", cqe->res); 215 + goto err; 216 + } 217 + if (!(verify_sz % MSG_SIZE)) { 218 + if (!verify_seq(rd, verify_ptr, verify_sz, verify_bid)) 219 + goto err; 220 + verify_bid += verify_sz / MSG_SIZE; 221 + verify_bid &= RECV_BID_MASK; 222 + verify_sz = 0; 223 + } else { 224 + memcpy(verify_ptr + verify_sz, rd->recv_buf + (bid * MSG_SIZE), cqe->res); 225 + verify_sz += cqe->res; 226 + } 227 + next_bid = bid + ((cqe->res + MSG_SIZE - 1) / MSG_SIZE); 228 + next_bid &= RECV_BID_MASK; 229 + rd->recv_bytes -= cqe->res; 230 + io_uring_cqe_seen(ring, cqe); 231 + if (!(cqe->flags & IORING_CQE_F_MORE) && rd->recv_bytes) { 232 + if (arm_recv(ring, rd)) 233 + goto err; 234 + } 235 + } while (rd->recv_bytes); 236 + 237 + if (verify_sz && !(verify_sz % MSG_SIZE) && 238 + !verify_seq(rd, verify_ptr, verify_sz, verify_bid)) 239 + goto err; 240 + 241 + pthread_barrier_wait(&rd->finish); 242 + return 0; 243 + err: 244 + pthread_barrier_wait(&rd->finish); 245 + return 1; 246 + } 247 + 248 + static void *recv_fn(void *data) 249 + { 250 + struct recv_data *rd = data; 251 + struct io_uring_params p = { }; 252 + struct io_uring ring; 253 + struct io_uring_buf_ring *br; 254 + void *buf, *ptr; 255 + int ret, sock, i; 256 + 257 + p.cq_entries = 4096; 258 + p.flags = IORING_SETUP_CQSIZE; 259 + ret = t_create_ring_params(16, &ring, &p); 260 + if (ret == T_SETUP_SKIP) { 261 + ret = 0; 262 + goto err; 263 + } else if (ret < 0) { 264 + goto err; 265 + } 266 + 267 + if (posix_memalign(&buf, 4096, MSG_SIZE * RECV_BIDS)) 268 + goto err; 269 + 270 + br = io_uring_setup_buf_ring(&ring, RECV_BIDS, RECV_BGID, 0, &ret); 271 + if (!br) { 272 + fprintf(stderr, "failed setting up recv ring %d\n", ret); 273 + goto err; 274 + } 275 + 276 + ptr = buf; 277 + for (i = 0; i < RECV_BIDS; i++) { 278 + io_uring_buf_ring_add(br, ptr, MSG_SIZE, i, RECV_BID_MASK, i); 279 + ptr += MSG_SIZE; 280 + } 281 + io_uring_buf_ring_advance(br, RECV_BIDS); 282 + rd->recv_buf = buf; 283 + 284 + ret = recv_prep(&ring, rd, &sock); 285 + if (ret) { 286 + fprintf(stderr, "recv_prep failed: %d\n", ret); 287 + goto err; 288 + } 289 + 290 + ret = do_recv(&ring, rd); 291 + 292 + close(sock); 293 + close(rd->accept_fd); 294 + io_uring_queue_exit(&ring); 295 + err: 296 + return (void *)(intptr_t)ret; 297 + } 298 + 299 + static int __do_send_bundle(struct recv_data *rd, struct io_uring *ring, int sockfd) 300 + { 301 + struct io_uring_cqe *cqe; 302 + struct io_uring_sqe *sqe; 303 + int bytes_needed = MSG_SIZE * nr_msgs; 304 + int i, ret; 305 + 306 + sqe = io_uring_get_sqe(ring); 307 + io_uring_prep_send_bundle(sqe, sockfd, 0, 0); 308 + sqe->flags |= IOSQE_BUFFER_SELECT; 309 + sqe->buf_group = SEND_BGID; 310 + sqe->user_data = 1; 311 + 312 + ret = io_uring_submit(ring); 313 + if (ret != 1) 314 + return 1; 315 + 316 + pthread_barrier_wait(&rd->barrier); 317 + 318 + for (i = 0; i < nr_msgs; i++) { 319 + ret = io_uring_wait_cqe(ring, &cqe); 320 + if (ret) { 321 + fprintf(stderr, "wait send: %d\n", ret); 322 + return 1; 323 + } 324 + if (!i && cqe->res == -EINVAL) { 325 + rd->abort = 1; 326 + no_send_mshot = 1; 327 + break; 328 + } 329 + if (cqe->res < 0) { 330 + fprintf(stderr, "bad send cqe res: %d\n", cqe->res); 331 + return 1; 332 + } 333 + bytes_needed -= cqe->res; 334 + if (!bytes_needed) { 335 + io_uring_cqe_seen(ring, cqe); 336 + break; 337 + } 338 + if (!(cqe->flags & IORING_CQE_F_MORE)) { 339 + fprintf(stderr, "expected more, but MORE not set\n"); 340 + return 1; 341 + } 342 + io_uring_cqe_seen(ring, cqe); 343 + } 344 + 345 + return 0; 346 + } 347 + 348 + static int __do_send(struct recv_data *rd, struct io_uring *ring, int sockfd) 349 + { 350 + struct io_uring_cqe *cqe; 351 + struct io_uring_sqe *sqe; 352 + int bytes_needed = MSG_SIZE * nr_msgs; 353 + int i, ret; 354 + 355 + for (i = 0; i < nr_msgs; i++) { 356 + sqe = io_uring_get_sqe(ring); 357 + io_uring_prep_send(sqe, sockfd, NULL, 0, 0); 358 + sqe->user_data = 10 + i; 359 + sqe->flags |= IOSQE_BUFFER_SELECT; 360 + sqe->buf_group = SEND_BGID; 361 + 362 + ret = io_uring_submit(ring); 363 + if (ret != 1) 364 + return 1; 365 + 366 + if (!i) 367 + pthread_barrier_wait(&rd->barrier); 368 + ret = io_uring_wait_cqe(ring, &cqe); 369 + if (ret) { 370 + fprintf(stderr, "send wait cqe %d\n", ret); 371 + return 1; 372 + } 373 + 374 + if (!i && cqe->res == -EINVAL) { 375 + rd->abort = 1; 376 + no_send_mshot = 1; 377 + break; 378 + } 379 + if (cqe->res != MSG_SIZE) { 380 + fprintf(stderr, "send failed cqe: %d\n", cqe->res); 381 + return 1; 382 + } 383 + if (cqe->res < 0) { 384 + fprintf(stderr, "bad send cqe res: %d\n", cqe->res); 385 + return 1; 386 + } 387 + bytes_needed -= cqe->res; 388 + io_uring_cqe_seen(ring, cqe); 389 + if (!bytes_needed) 390 + break; 391 + } 392 + 393 + return 0; 394 + } 395 + 396 + static int do_send(struct recv_data *rd) 397 + { 398 + struct sockaddr_in saddr; 399 + struct io_uring ring; 400 + unsigned long seq_buf[SEQ_SIZE], send_seq; 401 + struct io_uring_params p = { }; 402 + struct io_uring_buf_ring *br; 403 + int sockfd, ret, len, i; 404 + socklen_t optlen; 405 + void *buf, *ptr; 406 + 407 + ret = io_uring_queue_init_params(16, &ring, &p); 408 + if (ret) { 409 + fprintf(stderr, "queue init failed: %d\n", ret); 410 + return 1; 411 + } 412 + if (!(p.features & IORING_FEAT_RECVSEND_BUNDLE)) { 413 + no_send_mshot = 1; 414 + return 0; 415 + } 416 + 417 + if (posix_memalign(&buf, 4096, MSG_SIZE * nr_msgs)) 418 + return 1; 419 + 420 + br = io_uring_setup_buf_ring(&ring, nr_msgs, SEND_BGID, 0, &ret); 421 + if (!br) { 422 + if (ret == -EINVAL) { 423 + fprintf(stderr, "einval on br setup\n"); 424 + return 0; 425 + } 426 + fprintf(stderr, "failed setting up send ring %d\n", ret); 427 + return 1; 428 + } 429 + 430 + ptr = buf; 431 + for (i = 0; i < nr_msgs; i++) { 432 + io_uring_buf_ring_add(br, ptr, MSG_SIZE, i, nr_msgs - 1, i); 433 + ptr += MSG_SIZE; 434 + } 435 + io_uring_buf_ring_advance(br, nr_msgs); 436 + 437 + memset(&saddr, 0, sizeof(saddr)); 438 + saddr.sin_family = AF_INET; 439 + saddr.sin_port = htons(use_port); 440 + inet_pton(AF_INET, HOST, &saddr.sin_addr); 441 + 442 + if (use_tcp) 443 + sockfd = socket(AF_INET, SOCK_STREAM, 0); 444 + else 445 + sockfd = socket(AF_INET, SOCK_DGRAM, 0); 446 + if (sockfd < 0) { 447 + perror("socket"); 448 + goto err2; 449 + } 450 + 451 + pthread_barrier_wait(&rd->connect); 452 + 453 + ret = connect(sockfd, (struct sockaddr *)&saddr, sizeof(saddr)); 454 + if (ret < 0) { 455 + perror("connect"); 456 + goto err; 457 + } 458 + 459 + pthread_barrier_wait(&rd->startup); 460 + 461 + optlen = sizeof(len); 462 + len = 1024 * MSG_SIZE; 463 + setsockopt(sockfd, SOL_SOCKET, SO_SNDBUF, &len, optlen); 464 + 465 + /* almost fill queue, leave room for one message */ 466 + send_seq = 0; 467 + rd->to_eagain = 0; 468 + while (rd->max_sends && rd->max_sends--) { 469 + for (i = 0; i < SEQ_SIZE; i++) 470 + seq_buf[i] = send_seq++; 471 + 472 + ret = send(sockfd, seq_buf, sizeof(seq_buf), MSG_DONTWAIT); 473 + if (ret < 0) { 474 + if (errno == EAGAIN) { 475 + send_seq -= SEQ_SIZE; 476 + break; 477 + } 478 + perror("send"); 479 + return 1; 480 + } else if (ret != sizeof(seq_buf)) { 481 + fprintf(stderr, "short %d send\n", ret); 482 + return 1; 483 + } 484 + 485 + rd->to_eagain++; 486 + rd->recv_bytes += sizeof(seq_buf); 487 + } 488 + 489 + ptr = buf; 490 + for (i = 0; i < nr_msgs; i++) { 491 + unsigned long *pseq = ptr; 492 + int j; 493 + 494 + for (j = 0; j < SEQ_SIZE; j++) 495 + pseq[j] = send_seq++; 496 + ptr += MSG_SIZE; 497 + } 498 + 499 + /* prepare more messages, sending with bundle */ 500 + rd->recv_bytes += (nr_msgs * MSG_SIZE); 501 + if (rd->send_bundle && use_tcp) 502 + ret = __do_send_bundle(rd, &ring, sockfd); 503 + else 504 + ret = __do_send(rd, &ring, sockfd); 505 + if (ret) 506 + goto err; 507 + 508 + pthread_barrier_wait(&rd->finish); 509 + 510 + close(sockfd); 511 + io_uring_queue_exit(&ring); 512 + return 0; 513 + 514 + err: 515 + close(sockfd); 516 + err2: 517 + io_uring_queue_exit(&ring); 518 + pthread_barrier_wait(&rd->finish); 519 + return 1; 520 + } 521 + 522 + static int test(int backlog, unsigned int max_sends, int *to_eagain, 523 + int send_bundle, int recv_bundle) 524 + { 525 + pthread_t recv_thread; 526 + struct recv_data rd; 527 + int ret; 528 + void *retval; 529 + 530 + /* backlog not reliable on UDP, skip it */ 531 + if ((backlog || max_sends) && !use_tcp) 532 + return T_EXIT_PASS; 533 + 534 + memset(&rd, 0, sizeof(rd)); 535 + pthread_barrier_init(&rd.connect, NULL, 2); 536 + pthread_barrier_init(&rd.startup, NULL, 2); 537 + pthread_barrier_init(&rd.barrier, NULL, 2); 538 + pthread_barrier_init(&rd.finish, NULL, 2); 539 + rd.max_sends = max_sends; 540 + if (to_eagain) 541 + *to_eagain = 0; 542 + 543 + rd.send_bundle = send_bundle; 544 + rd.recv_bundle = recv_bundle; 545 + 546 + ret = pthread_create(&recv_thread, NULL, recv_fn, &rd); 547 + if (ret) { 548 + fprintf(stderr, "Thread create failed: %d\n", ret); 549 + return 1; 550 + } 551 + 552 + ret = do_send(&rd); 553 + if (no_send_mshot) 554 + return 0; 555 + 556 + if (ret) 557 + return ret; 558 + 559 + pthread_join(recv_thread, &retval); 560 + if (to_eagain) 561 + *to_eagain = rd.to_eagain; 562 + return (intptr_t)retval; 563 + } 564 + 565 + static int run_tests(int is_udp) 566 + { 567 + int ret, eagain_hit; 568 + 569 + nr_msgs = NR_MIN_MSGS; 570 + 571 + /* test basic send bundle first */ 572 + ret = test(0, 0, NULL, 0, 0); 573 + if (ret) { 574 + fprintf(stderr, "test a failed\n"); 575 + return T_EXIT_FAIL; 576 + } 577 + if (no_send_mshot) 578 + return T_EXIT_SKIP; 579 + 580 + /* test recv bundle */ 581 + ret = test(0, 0, NULL, 0, 1); 582 + if (ret) { 583 + fprintf(stderr, "test b failed\n"); 584 + return T_EXIT_FAIL; 585 + } 586 + 587 + /* test bundling recv and send */ 588 + ret = test(0, 0, NULL, 1, 1); 589 + if (ret) { 590 + fprintf(stderr, "test c failed\n"); 591 + return T_EXIT_FAIL; 592 + } 593 + 594 + /* test bundling with full socket */ 595 + ret = test(1, 1000000, &eagain_hit, 1, 1); 596 + if (ret) { 597 + fprintf(stderr, "test d failed\n"); 598 + return T_EXIT_FAIL; 599 + } 600 + 601 + /* test bundling with almost full socket */ 602 + ret = test(1, eagain_hit - (nr_msgs / 2), NULL, 1, 1); 603 + if (ret) { 604 + fprintf(stderr, "test e failed\n"); 605 + return T_EXIT_FAIL; 606 + } 607 + 608 + /* test recv bundle with almost full socket */ 609 + ret = test(1, eagain_hit - (nr_msgs / 2), NULL, 0, 1); 610 + if (ret) { 611 + fprintf(stderr, "test f failed\n"); 612 + return T_EXIT_FAIL; 613 + } 614 + 615 + if (is_udp) 616 + return T_EXIT_PASS; 617 + 618 + /* test send bundle with almost full socket */ 619 + ret = test(1, eagain_hit - (nr_msgs / 2), &eagain_hit, 1, 0); 620 + if (ret) { 621 + fprintf(stderr, "test g failed\n"); 622 + return T_EXIT_FAIL; 623 + } 624 + 625 + /* now repeat the last three tests, but with > FAST_UIOV segments */ 626 + nr_msgs = NR_MAX_MSGS; 627 + 628 + /* test bundling with almost full socket */ 629 + ret = test(1, eagain_hit - (nr_msgs / 2), NULL, 1, 1); 630 + if (ret) { 631 + fprintf(stderr, "test h failed\n"); 632 + return T_EXIT_FAIL; 633 + } 634 + 635 + /* test recv bundle with almost full socket */ 636 + ret = test(1, eagain_hit - (nr_msgs / 2), NULL, 0, 1); 637 + if (ret) { 638 + fprintf(stderr, "test i failed\n"); 639 + return T_EXIT_FAIL; 640 + } 641 + 642 + /* test send bundle with almost full socket */ 643 + ret = test(1, eagain_hit - (nr_msgs / 2), &eagain_hit, 1, 0); 644 + if (ret) { 645 + fprintf(stderr, "test j failed\n"); 646 + return T_EXIT_FAIL; 647 + } 648 + 649 + return T_EXIT_PASS; 650 + } 651 + 652 + static int test_tcp(void) 653 + { 654 + int ret; 655 + 656 + use_tcp = 1; 657 + ret = run_tests(false); 658 + if (ret == T_EXIT_FAIL) 659 + fprintf(stderr, "TCP test case failed\n"); 660 + return ret; 661 + } 662 + 663 + static int test_udp(void) 664 + { 665 + int ret; 666 + 667 + use_tcp = 0; 668 + use_port++; 669 + ret = run_tests(true); 670 + if (ret == T_EXIT_FAIL) 671 + fprintf(stderr, "UDP test case failed\n"); 672 + return ret; 673 + } 674 + 675 + int main(int argc, char *argv[]) 676 + { 677 + int ret; 678 + 679 + if (argc > 1) 680 + return T_EXIT_SKIP; 681 + 682 + ret = test_tcp(); 683 + if (ret != T_EXIT_PASS) 684 + return ret; 685 + 686 + ret = test_udp(); 687 + if (ret != T_EXIT_PASS) 688 + return ret; 689 + 690 + return T_EXIT_PASS; 691 + }

+131

vendor/liburing/test/reg-fd-only.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Test io_uring_setup with IORING_SETUP_REGISTERED_FD_ONLY 4 + * 5 + */ 6 + #include <stdio.h> 7 + 8 + #include "helpers.h" 9 + 10 + #define NORMAL_PAGE_ENTRIES 8 11 + #define HUGE_PAGE_ENTRIES 512 12 + 13 + static int no_mmap; 14 + 15 + static int test_nops(struct io_uring *ring, int sq_size, int nr_nops) 16 + { 17 + struct io_uring_sqe *sqe; 18 + struct io_uring_cqe *cqe; 19 + int i, ret; 20 + 21 + do { 22 + int todo = nr_nops; 23 + 24 + if (todo > sq_size) 25 + todo = sq_size; 26 + 27 + for (i = 0; i < todo; i++) { 28 + sqe = io_uring_get_sqe(ring); 29 + io_uring_prep_nop(sqe); 30 + } 31 + 32 + ret = io_uring_submit(ring); 33 + if (ret != todo) { 34 + fprintf(stderr, "short submit %d\n", ret); 35 + return T_EXIT_FAIL; 36 + } 37 + 38 + for (i = 0; i < todo; i++) { 39 + ret = io_uring_wait_cqe(ring, &cqe); 40 + if (ret) { 41 + fprintf(stderr, "wait err %d\n", ret); 42 + return T_EXIT_FAIL; 43 + } 44 + io_uring_cqe_seen(ring, cqe); 45 + } 46 + nr_nops -= todo; 47 + } while (nr_nops); 48 + 49 + return T_EXIT_PASS; 50 + } 51 + 52 + static int test(int nentries) 53 + { 54 + struct io_uring ring; 55 + unsigned values[2]; 56 + int ret; 57 + 58 + ret = io_uring_queue_init(nentries, &ring, 59 + IORING_SETUP_REGISTERED_FD_ONLY | IORING_SETUP_NO_MMAP); 60 + if (ret == -EINVAL) { 61 + no_mmap = 1; 62 + return T_EXIT_SKIP; 63 + } else if (ret == -ENOMEM) { 64 + fprintf(stdout, "Enable huge pages to test big rings\n"); 65 + return T_EXIT_SKIP; 66 + } else if (ret) { 67 + fprintf(stderr, "ring setup failed\n"); 68 + return T_EXIT_FAIL; 69 + } 70 + 71 + ret = io_uring_register_ring_fd(&ring); 72 + if (ret != -EEXIST) { 73 + fprintf(stderr, "registering already-registered ring fd should fail\n"); 74 + goto err; 75 + } 76 + 77 + ret = io_uring_close_ring_fd(&ring); 78 + if (ret != -EBADF) { 79 + fprintf(stderr, "closing already-closed ring fd should fail\n"); 80 + goto err; 81 + } 82 + 83 + /* Test a simple io_uring_register operation expected to work. 84 + * io_uring_register_iowq_max_workers is arbitrary. 85 + */ 86 + values[0] = values[1] = 0; 87 + ret = io_uring_register_iowq_max_workers(&ring, values); 88 + if (ret || (values[0] == 0 && values[1] == 0)) { 89 + fprintf(stderr, "io_uring_register operation failed after closing ring fd\n"); 90 + goto err; 91 + } 92 + 93 + ret = test_nops(&ring, nentries, nentries * 4); 94 + if (ret) 95 + goto err; 96 + 97 + io_uring_queue_exit(&ring); 98 + return T_EXIT_PASS; 99 + 100 + err: 101 + io_uring_queue_exit(&ring); 102 + return T_EXIT_FAIL; 103 + } 104 + 105 + int main(int argc, char *argv[]) 106 + { 107 + int ret; 108 + 109 + if (argc > 1) 110 + return T_EXIT_SKIP; 111 + 112 + /* test single normal page */ 113 + ret = test(NORMAL_PAGE_ENTRIES); 114 + if (ret == T_EXIT_SKIP || no_mmap) { 115 + return T_EXIT_SKIP; 116 + } else if (ret != T_EXIT_PASS) { 117 + fprintf(stderr, "test 8 failed\n"); 118 + return T_EXIT_FAIL; 119 + } 120 + 121 + /* test with entries requiring a huge page */ 122 + ret = test(HUGE_PAGE_ENTRIES); 123 + if (ret == T_EXIT_SKIP) { 124 + return T_EXIT_SKIP; 125 + } else if (ret != T_EXIT_PASS) { 126 + fprintf(stderr, "test 512 failed\n"); 127 + return T_EXIT_FAIL; 128 + } 129 + 130 + return T_EXIT_PASS; 131 + }

+19 -5

vendor/liburing/test/ring-leak.c

··· 23 23 #include <linux/fs.h> 24 24 25 25 #include "liburing.h" 26 + #include "helpers.h" 26 27 #include "../src/syscall.h" 27 28 28 29 static int __io_uring_register_files(int ring_fd, int fd1, int fd2) ··· 48 49 return fd; 49 50 } 50 51 51 - static void send_fd(int socket, int fd) 52 + static int send_fd(int socket, int fd) 52 53 { 53 54 char buf[CMSG_SPACE(sizeof(fd))]; 54 55 struct cmsghdr *cmsg; ··· 69 70 70 71 msg.msg_controllen = CMSG_SPACE(sizeof(fd)); 71 72 72 - if (sendmsg(socket, &msg, 0) < 0) 73 + if (sendmsg(socket, &msg, 0) < 0) { 74 + if (errno == EINVAL) 75 + return T_EXIT_SKIP; 73 76 perror("sendmsg"); 77 + return T_EXIT_FAIL; 78 + } 79 + 80 + return T_EXIT_PASS; 74 81 } 75 82 76 83 static int test_iowq_request_cancel(void) ··· 166 173 perror("pipe"); 167 174 return -1; 168 175 } 169 - send_fd(sp[0], ring.ring_fd); 176 + ret = send_fd(sp[0], ring.ring_fd); 177 + if (ret != T_EXIT_PASS) 178 + return ret; 170 179 171 180 /* register an empty set for updates */ 172 181 if (update) { ··· 236 245 bool update = !!(i & 1); 237 246 238 247 ret = test_scm_cycles(update); 248 + if (ret == T_EXIT_SKIP) 249 + return T_EXIT_SKIP; 239 250 if (ret) { 240 251 fprintf(stderr, "test_scm_cycles() failed %i\n", 241 252 update); ··· 259 270 } 260 271 261 272 pid = fork(); 262 - if (pid) 263 - send_fd(sp[0], ring_fd); 273 + if (pid) { 274 + ret = send_fd(sp[0], ring_fd); 275 + if (ret != T_EXIT_PASS) 276 + return ret; 277 + } 264 278 265 279 close(ring_fd); 266 280 close(sp[0]);

+242

vendor/liburing/test/ringbuf-status.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Description: test reading provided ring buf head 4 + * 5 + */ 6 + #include <errno.h> 7 + #include <stdio.h> 8 + #include <unistd.h> 9 + #include <stdlib.h> 10 + #include <string.h> 11 + #include <fcntl.h> 12 + 13 + #include "liburing.h" 14 + #include "helpers.h" 15 + 16 + #define BUF_SIZE 32 17 + #define NR_BUFS 8 18 + #define FSIZE (BUF_SIZE * NR_BUFS) 19 + 20 + #define BR_MASK (NR_BUFS - 1) 21 + #define BGID 1 22 + 23 + static int no_buf_ring; 24 + static int no_buf_ring_status; 25 + 26 + static int test_max(void) 27 + { 28 + struct io_uring_buf_ring *br; 29 + struct io_uring ring; 30 + int nr_bufs = 32768; 31 + int ret, i; 32 + char *buf; 33 + 34 + ret = io_uring_queue_init(1, &ring, 0); 35 + if (ret) { 36 + fprintf(stderr, "ring setup failed: %d\n", ret); 37 + return 1; 38 + } 39 + 40 + if (posix_memalign((void **) &buf, 4096, FSIZE)) 41 + return 1; 42 + 43 + br = io_uring_setup_buf_ring(&ring, nr_bufs, BGID, 0, &ret); 44 + if (!br) { 45 + fprintf(stderr, "Buffer ring register failed %d\n", ret); 46 + return 1; 47 + } 48 + 49 + ret = io_uring_buf_ring_available(&ring, br, BGID); 50 + if (ret) { 51 + fprintf(stderr, "Bad available count %d\n", ret); 52 + return 1; 53 + } 54 + 55 + for (i = 0; i < nr_bufs / 2; i++) 56 + io_uring_buf_ring_add(br, buf, BUF_SIZE, i + 1, nr_bufs - 1, i); 57 + io_uring_buf_ring_advance(br, nr_bufs / 2); 58 + 59 + ret = io_uring_buf_ring_available(&ring, br, BGID); 60 + if (ret != nr_bufs / 2) { 61 + fprintf(stderr, "Bad half full available count %d\n", ret); 62 + return 1; 63 + } 64 + 65 + for (i = 0; i < nr_bufs / 2; i++) 66 + io_uring_buf_ring_add(br, buf, BUF_SIZE, i + 1, nr_bufs - 1, i); 67 + io_uring_buf_ring_advance(br, nr_bufs / 2); 68 + 69 + ret = io_uring_buf_ring_available(&ring, br, BGID); 70 + if (ret != nr_bufs) { 71 + fprintf(stderr, "Bad half full available count %d\n", ret); 72 + return 1; 73 + } 74 + 75 + free(buf); 76 + io_uring_queue_exit(&ring); 77 + return T_EXIT_PASS; 78 + } 79 + 80 + static int test(int invalid) 81 + { 82 + struct io_uring_sqe *sqe; 83 + struct io_uring_cqe *cqe; 84 + struct io_uring ring; 85 + struct io_uring_buf_ring *br; 86 + int ret, i, fds[2]; 87 + uint16_t head; 88 + char *buf; 89 + void *ptr; 90 + char output[16]; 91 + 92 + memset(output, 0x55, sizeof(output)); 93 + 94 + ret = io_uring_queue_init(NR_BUFS, &ring, 0); 95 + if (ret) { 96 + fprintf(stderr, "ring setup failed: %d\n", ret); 97 + return 1; 98 + } 99 + 100 + if (pipe(fds) < 0) { 101 + perror("pipe"); 102 + return T_EXIT_FAIL; 103 + } 104 + 105 + if (posix_memalign((void **) &buf, 4096, FSIZE)) 106 + return 1; 107 + 108 + br = io_uring_setup_buf_ring(&ring, NR_BUFS, BGID, 0, &ret); 109 + if (!br) { 110 + if (ret == -EINVAL) { 111 + no_buf_ring = 1; 112 + return 0; 113 + } 114 + fprintf(stderr, "Buffer ring register failed %d\n", ret); 115 + return 1; 116 + } 117 + 118 + ptr = buf; 119 + for (i = 0; i < NR_BUFS; i++) { 120 + io_uring_buf_ring_add(br, ptr, BUF_SIZE, i + 1, BR_MASK, i); 121 + ptr += BUF_SIZE; 122 + } 123 + io_uring_buf_ring_advance(br, NR_BUFS); 124 + 125 + /* head should be zero at this point */ 126 + head = 1; 127 + if (!invalid) 128 + ret = io_uring_buf_ring_head(&ring, BGID, &head); 129 + else 130 + ret = io_uring_buf_ring_head(&ring, BGID + 10, &head); 131 + if (ret) { 132 + if (ret == -EINVAL) { 133 + no_buf_ring_status = 1; 134 + return T_EXIT_SKIP; 135 + } 136 + if (invalid && ret == -ENOENT) 137 + return T_EXIT_PASS; 138 + fprintf(stderr, "buf_ring_head: %d\n", ret); 139 + return T_EXIT_FAIL; 140 + } 141 + if (invalid) { 142 + fprintf(stderr, "lookup of bad group id succeeded\n"); 143 + return T_EXIT_FAIL; 144 + } 145 + if (head != 0) { 146 + fprintf(stderr, "bad head %d\n", head); 147 + return T_EXIT_FAIL; 148 + } 149 + 150 + ret = io_uring_buf_ring_available(&ring, br, BGID); 151 + if (ret != NR_BUFS) { 152 + fprintf(stderr, "ring available %d\n", ret); 153 + return T_EXIT_FAIL; 154 + } 155 + 156 + sqe = io_uring_get_sqe(&ring); 157 + io_uring_prep_read(sqe, fds[0], NULL, BUF_SIZE, i * BUF_SIZE); 158 + sqe->buf_group = BGID; 159 + sqe->flags |= IOSQE_BUFFER_SELECT; 160 + sqe->user_data = 1; 161 + 162 + ret = io_uring_submit(&ring); 163 + if (ret != 1) { 164 + fprintf(stderr, "submit: %d\n", ret); 165 + return T_EXIT_FAIL; 166 + } 167 + 168 + /* head should still be zero at this point, no buffers consumed */ 169 + head = 1; 170 + ret = io_uring_buf_ring_head(&ring, BGID, &head); 171 + if (head != 0) { 172 + fprintf(stderr, "bad head after submit %d\n", head); 173 + return T_EXIT_FAIL; 174 + } 175 + 176 + ret = write(fds[1], output, sizeof(output)); 177 + if (ret != sizeof(output)) { 178 + fprintf(stderr, "pipe buffer write %d\n", ret); 179 + return T_EXIT_FAIL; 180 + } 181 + 182 + ret = io_uring_wait_cqe(&ring, &cqe); 183 + if (ret) { 184 + fprintf(stderr, "wait cqe failed %d\n", ret); 185 + return T_EXIT_FAIL; 186 + } 187 + if (cqe->res != sizeof(output)) { 188 + fprintf(stderr, "cqe res %d\n", cqe->res); 189 + return T_EXIT_FAIL; 190 + } 191 + if (!(cqe->flags & IORING_CQE_F_BUFFER)) { 192 + fprintf(stderr, "no buffer selected\n"); 193 + return T_EXIT_FAIL; 194 + } 195 + io_uring_cqe_seen(&ring, cqe); 196 + 197 + /* head should now be one, we consumed a buffer */ 198 + ret = io_uring_buf_ring_head(&ring, BGID, &head); 199 + if (head != 1) { 200 + fprintf(stderr, "bad head after cqe %d\n", head); 201 + return T_EXIT_FAIL; 202 + } 203 + 204 + ret = io_uring_buf_ring_available(&ring, br, BGID); 205 + if (ret != NR_BUFS - 1) { 206 + fprintf(stderr, "ring available %d\n", ret); 207 + return T_EXIT_FAIL; 208 + } 209 + 210 + close(fds[0]); 211 + close(fds[1]); 212 + free(buf); 213 + io_uring_queue_exit(&ring); 214 + return T_EXIT_PASS; 215 + } 216 + 217 + int main(int argc, char *argv[]) 218 + { 219 + int ret; 220 + 221 + ret = test(0); 222 + if (ret == T_EXIT_FAIL) { 223 + fprintf(stderr, "test 0 failed\n"); 224 + return T_EXIT_FAIL; 225 + } 226 + if (no_buf_ring || no_buf_ring_status) 227 + return T_EXIT_SKIP; 228 + 229 + ret = test(1); 230 + if (ret == T_EXIT_FAIL) { 231 + fprintf(stderr, "test 1 failed\n"); 232 + return T_EXIT_FAIL; 233 + } 234 + 235 + ret = test_max(); 236 + if (ret == T_EXIT_FAIL) { 237 + fprintf(stderr, "test_max failed\n"); 238 + return T_EXIT_FAIL; 239 + } 240 + 241 + return T_EXIT_PASS; 242 + }

+232 -116

vendor/liburing/test/send-zerocopy.c

··· 68 68 static size_t page_sz; 69 69 static char *tx_buffer, *rx_buffer; 70 70 static struct iovec buffers_iov[__BUF_NR]; 71 + 72 + static bool has_sendzc; 71 73 static bool has_sendmsg; 74 + static bool hit_enomem; 75 + 76 + static int probe_zc_support(void) 77 + { 78 + struct io_uring ring; 79 + struct io_uring_probe *p; 80 + int ret; 81 + 82 + has_sendzc = has_sendmsg = false; 83 + 84 + ret = io_uring_queue_init(1, &ring, 0); 85 + if (ret) 86 + return -1; 87 + 88 + p = t_calloc(1, sizeof(*p) + 256 * sizeof(struct io_uring_probe_op)); 89 + if (!p) 90 + return -1; 91 + 92 + ret = io_uring_register_probe(&ring, p, 256); 93 + if (ret) 94 + return -1; 95 + 96 + has_sendzc = p->ops_len > IORING_OP_SEND_ZC; 97 + has_sendmsg = p->ops_len > IORING_OP_SENDMSG_ZC; 98 + io_uring_queue_exit(&ring); 99 + free(p); 100 + return 0; 101 + } 72 102 73 103 static bool check_cq_empty(struct io_uring *ring) 74 104 { ··· 98 128 99 129 ret = io_uring_wait_cqe(ring, &cqe); 100 130 assert(!ret && cqe->user_data == 1); 101 - if (cqe->res == -EINVAL) { 102 - assert(!(cqe->flags & IORING_CQE_F_MORE)); 103 - return T_EXIT_SKIP; 104 - } else if (cqe->res != payload_size) { 131 + if (cqe->res != payload_size) { 105 132 fprintf(stderr, "send failed %i\n", cqe->res); 106 133 return T_EXIT_FAIL; 107 134 } ··· 122 149 return T_EXIT_PASS; 123 150 } 124 151 152 + static int test_send_faults_check(struct io_uring *ring, int expected) 153 + { 154 + struct io_uring_cqe *cqe; 155 + int ret, nr_cqes = 0; 156 + bool more = true; 157 + 158 + while (more) { 159 + nr_cqes++; 160 + ret = io_uring_wait_cqe(ring, &cqe); 161 + assert(!ret); 162 + assert(cqe->user_data == 1); 163 + 164 + if (nr_cqes == 1 && (cqe->flags & IORING_CQE_F_NOTIF)) { 165 + fprintf(stderr, "test_send_faults_check notif came first\n"); 166 + return -1; 167 + } 168 + 169 + if (!(cqe->flags & IORING_CQE_F_NOTIF)) { 170 + if (cqe->res != expected) { 171 + fprintf(stderr, "invalid cqe res %i vs expected %i, " 172 + "user_data %i\n", 173 + cqe->res, expected, (int)cqe->user_data); 174 + return -1; 175 + } 176 + } else { 177 + if (cqe->res != 0 || cqe->flags != IORING_CQE_F_NOTIF) { 178 + fprintf(stderr, "invalid notif cqe %i %i\n", 179 + cqe->res, cqe->flags); 180 + return -1; 181 + } 182 + } 183 + 184 + more = cqe->flags & IORING_CQE_F_MORE; 185 + io_uring_cqe_seen(ring, cqe); 186 + } 187 + 188 + if (nr_cqes > 2) { 189 + fprintf(stderr, "test_send_faults_check() too many CQEs %i\n", 190 + nr_cqes); 191 + return -1; 192 + } 193 + assert(check_cq_empty(ring)); 194 + return 0; 195 + } 196 + 125 197 static int test_send_faults(int sock_tx, int sock_rx) 126 198 { 127 199 struct io_uring_sqe *sqe; 128 - struct io_uring_cqe *cqe; 129 200 int msg_flags = 0; 130 201 unsigned zc_flags = 0; 131 - int payload_size = 100; 132 - int ret, i, nr_cqes, nr_reqs = 3; 202 + int ret, payload_size = 100; 133 203 struct io_uring ring; 134 204 135 - ret = io_uring_queue_init(32, &ring, IORING_SETUP_SUBMIT_ALL); 205 + ret = io_uring_queue_init(32, &ring, 0); 136 206 if (ret) { 137 207 fprintf(stderr, "queue init failed: %d\n", ret); 138 208 return -1; ··· 143 213 io_uring_prep_send_zc(sqe, sock_tx, (void *)1UL, payload_size, 144 214 msg_flags, zc_flags); 145 215 sqe->user_data = 1; 216 + ret = io_uring_submit(&ring); 217 + assert(ret == 1); 218 + 219 + ret = test_send_faults_check(&ring, -EFAULT); 220 + if (ret) { 221 + fprintf(stderr, "test_send_faults with invalid buf failed\n"); 222 + return -1; 223 + } 146 224 147 225 /* invalid address */ 148 226 sqe = io_uring_get_sqe(&ring); ··· 150 228 msg_flags, zc_flags); 151 229 io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)1UL, 152 230 sizeof(struct sockaddr_in6)); 153 - sqe->user_data = 2; 231 + sqe->user_data = 1; 232 + ret = io_uring_submit(&ring); 233 + assert(ret == 1); 234 + 235 + ret = test_send_faults_check(&ring, -EFAULT); 236 + if (ret) { 237 + fprintf(stderr, "test_send_faults with invalid addr failed\n"); 238 + return -1; 239 + } 154 240 155 241 /* invalid send/recv flags */ 156 242 sqe = io_uring_get_sqe(&ring); 157 243 io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size, 158 244 msg_flags, ~0U); 159 - sqe->user_data = 3; 160 - 245 + sqe->user_data = 1; 161 246 ret = io_uring_submit(&ring); 162 - assert(ret == nr_reqs); 163 - 164 - nr_cqes = nr_reqs; 165 - for (i = 0; i < nr_cqes; i++) { 166 - ret = io_uring_wait_cqe(&ring, &cqe); 167 - assert(!ret); 168 - assert(cqe->user_data <= nr_reqs); 247 + assert(ret == 1); 169 248 170 - if (!(cqe->flags & IORING_CQE_F_NOTIF)) { 171 - int expected = (cqe->user_data == 3) ? -EINVAL : -EFAULT; 249 + ret = test_send_faults_check(&ring, -EINVAL); 250 + if (ret) { 251 + fprintf(stderr, "test_send_faults with invalid flags failed\n"); 252 + return -1; 253 + } 172 254 173 - if (cqe->res != expected) { 174 - fprintf(stderr, "invalid cqe res %i vs expected %i, " 175 - "user_data %i\n", 176 - cqe->res, expected, (int)cqe->user_data); 177 - return -1; 178 - } 179 - if (cqe->flags & IORING_CQE_F_MORE) 180 - nr_cqes++; 181 - } else { 182 - if (cqe->res != 0 || cqe->flags != IORING_CQE_F_NOTIF) { 183 - fprintf(stderr, "invalid notif cqe %i %i\n", 184 - cqe->res, cqe->flags); 185 - return -1; 186 - } 187 - } 188 - io_uring_cqe_seen(&ring, cqe); 189 - } 190 - assert(check_cq_empty(&ring)); 191 255 return T_EXIT_PASS; 192 256 } 193 257 ··· 278 342 #ifdef SO_ZEROCOPY 279 343 int val = 1; 280 344 345 + /* 346 + * NOTE: apps must not set SO_ZEROCOPY when using io_uring zc. 347 + * It's only here to test interactions with MSG_ZEROCOPY. 348 + */ 281 349 if (setsockopt(*sock_client, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) { 282 350 perror("setsockopt zc"); 283 351 return 1; ··· 469 537 if (cqe->user_data == nr_reqs - 1) 470 538 expected = chunk_size_last; 471 539 if (cqe->res != expected) { 540 + if (cqe->res == -ENOMEM) { 541 + if (!hit_enomem) { 542 + fprintf(stderr, "Hit -ENOMEM. " 543 + "Increase ulimit -l " 544 + "limit for a complete " 545 + "test run. Skipping " 546 + "parts.\n"); 547 + hit_enomem = 1; 548 + } 549 + return 0; 550 + } 472 551 fprintf(stderr, "invalid cqe->res %d expected %d\n", 473 552 cqe->res, expected); 474 553 return 1; ··· 659 738 return 0; 660 739 } 661 740 662 - static bool io_check_zc_sendmsg(struct io_uring *ring) 663 - { 664 - struct io_uring_probe *p; 665 - int ret; 666 - 667 - p = t_calloc(1, sizeof(*p) + 256 * sizeof(struct io_uring_probe_op)); 668 - if (!p) { 669 - fprintf(stderr, "probe allocation failed\n"); 670 - return false; 671 - } 672 - ret = io_uring_register_probe(ring, p, 256); 673 - if (ret) 674 - return false; 675 - return p->ops_len > IORING_OP_SENDMSG_ZC; 676 - } 677 - 678 741 /* see also send_recv.c:test_invalid */ 679 742 static int test_invalid_zc(int fds[2]) 680 743 { ··· 718 781 return 0; 719 782 } 720 783 721 - int main(int argc, char *argv[]) 784 + static int run_basic_tests(void) 722 785 { 723 786 struct sockaddr_storage addr; 724 - struct io_uring ring; 725 - int i, ret, sp[2]; 787 + int ret, i, sp[2]; 788 + 789 + /* create TCP IPv6 pair */ 790 + ret = create_socketpair_ip(&addr, &sp[0], &sp[1], true, true, false, true); 791 + if (ret) { 792 + fprintf(stderr, "sock prep failed %d\n", ret); 793 + return -1; 794 + } 795 + 796 + for (i = 0; i < 2; i++) { 797 + struct io_uring ring; 798 + unsigned ring_flags = 0; 799 + 800 + if (i & 1) 801 + ring_flags |= IORING_SETUP_DEFER_TASKRUN; 802 + 803 + ret = io_uring_queue_init(32, &ring, ring_flags); 804 + if (ret) { 805 + if (ret == -EINVAL) 806 + continue; 807 + fprintf(stderr, "queue init failed: %d\n", ret); 808 + return -1; 809 + } 810 + 811 + ret = test_basic_send(&ring, sp[0], sp[1]); 812 + if (ret) { 813 + fprintf(stderr, "test_basic_send() failed\n"); 814 + return -1; 815 + } 816 + 817 + ret = test_send_faults(sp[0], sp[1]); 818 + if (ret) { 819 + fprintf(stderr, "test_send_faults() failed\n"); 820 + return -1; 821 + } 822 + 823 + ret = test_invalid_zc(sp); 824 + if (ret) { 825 + fprintf(stderr, "test_invalid_zc() failed\n"); 826 + return -1; 827 + } 828 + 829 + ret = test_async_addr(&ring); 830 + if (ret) { 831 + fprintf(stderr, "test_async_addr() failed\n"); 832 + return T_EXIT_FAIL; 833 + } 834 + 835 + io_uring_queue_exit(&ring); 836 + } 837 + 838 + close(sp[0]); 839 + close(sp[1]); 840 + return 0; 841 + } 842 + 843 + int main(int argc, char *argv[]) 844 + { 726 845 size_t len; 846 + int ret, i; 727 847 728 848 if (argc > 1) 729 849 return T_EXIT_SKIP; 730 850 731 - page_sz = sysconf(_SC_PAGESIZE); 732 - 733 - /* create TCP IPv6 pair */ 734 - ret = create_socketpair_ip(&addr, &sp[0], &sp[1], true, true, false, true); 851 + ret = probe_zc_support(); 735 852 if (ret) { 736 - fprintf(stderr, "sock prep failed %d\n", ret); 853 + printf("probe failed\n"); 737 854 return T_EXIT_FAIL; 738 855 } 856 + if (!has_sendzc) { 857 + printf("no IORING_OP_SEND_ZC support, skip\n"); 858 + return T_EXIT_SKIP; 859 + } 860 + 861 + page_sz = sysconf(_SC_PAGESIZE); 739 862 740 863 len = LARGE_BUF_SIZE; 741 864 tx_buffer = aligned_alloc(page_sz, len); ··· 786 909 } 787 910 } 788 911 789 - ret = io_uring_queue_init(32, &ring, 0); 790 - if (ret) { 791 - fprintf(stderr, "queue init failed: %d\n", ret); 792 - return T_EXIT_FAIL; 793 - } 794 - 795 - ret = test_basic_send(&ring, sp[0], sp[1]); 796 - if (ret == T_EXIT_SKIP) 797 - return ret; 798 - if (ret) { 799 - fprintf(stderr, "test_basic_send() failed\n"); 912 + ret = run_basic_tests(); 913 + if (ret) 800 914 return T_EXIT_FAIL; 801 - } 802 915 803 - has_sendmsg = io_check_zc_sendmsg(&ring); 916 + for (i = 0; i < 2; i++) { 917 + struct io_uring ring; 918 + unsigned ring_flags = 0; 804 919 805 - ret = test_send_faults(sp[0], sp[1]); 806 - if (ret) { 807 - fprintf(stderr, "test_send_faults() failed\n"); 808 - return T_EXIT_FAIL; 809 - } 920 + if (i & 1) 921 + ring_flags |= IORING_SETUP_SINGLE_ISSUER | 922 + IORING_SETUP_DEFER_TASKRUN; 810 923 811 - ret = test_invalid_zc(sp); 812 - if (ret) { 813 - fprintf(stderr, "test_invalid_zc() failed\n"); 814 - return T_EXIT_FAIL; 815 - } 924 + ret = io_uring_queue_init(32, &ring, ring_flags); 925 + if (ret) { 926 + if (ret == -EINVAL) 927 + continue; 928 + fprintf(stderr, "queue init failed: %d\n", ret); 929 + return -1; 930 + } 816 931 817 - close(sp[0]); 818 - close(sp[1]); 932 + ret = t_register_buffers(&ring, buffers_iov, ARRAY_SIZE(buffers_iov)); 933 + if (ret == T_SETUP_SKIP) { 934 + fprintf(stderr, "can't register bufs, skip\n"); 935 + goto out; 936 + } else if (ret != T_SETUP_OK) { 937 + fprintf(stderr, "buffer registration failed %i\n", ret); 938 + return T_EXIT_FAIL; 939 + } 819 940 820 - ret = test_async_addr(&ring); 821 - if (ret) { 822 - fprintf(stderr, "test_async_addr() failed\n"); 823 - return T_EXIT_FAIL; 824 - } 941 + if (buffers_iov[BUF_T_HUGETLB].iov_base) { 942 + buffers_iov[BUF_T_HUGETLB].iov_base += 13; 943 + buffers_iov[BUF_T_HUGETLB].iov_len -= 26; 944 + } 945 + if (buffers_iov[BUF_T_LARGE].iov_base) { 946 + buffers_iov[BUF_T_LARGE].iov_base += 13; 947 + buffers_iov[BUF_T_LARGE].iov_len -= 26; 948 + } 825 949 826 - ret = t_register_buffers(&ring, buffers_iov, ARRAY_SIZE(buffers_iov)); 827 - if (ret == T_SETUP_SKIP) { 828 - fprintf(stderr, "can't register bufs, skip\n"); 829 - goto out; 830 - } else if (ret != T_SETUP_OK) { 831 - fprintf(stderr, "buffer registration failed %i\n", ret); 832 - return T_EXIT_FAIL; 833 - } 950 + ret = test_inet_send(&ring); 951 + if (ret) { 952 + fprintf(stderr, "test_inet_send() failed (defer_taskrun %i)\n", 953 + ring_flags & IORING_SETUP_DEFER_TASKRUN); 954 + return T_EXIT_FAIL; 955 + } 834 956 835 - if (buffers_iov[BUF_T_HUGETLB].iov_base) { 836 - buffers_iov[BUF_T_HUGETLB].iov_base += 13; 837 - buffers_iov[BUF_T_HUGETLB].iov_len -= 26; 838 - } 839 - if (buffers_iov[BUF_T_LARGE].iov_base) { 840 - buffers_iov[BUF_T_LARGE].iov_base += 13; 841 - buffers_iov[BUF_T_LARGE].iov_len -= 26; 957 + if (buffers_iov[BUF_T_HUGETLB].iov_base) { 958 + buffers_iov[BUF_T_HUGETLB].iov_base -= 13; 959 + buffers_iov[BUF_T_HUGETLB].iov_len += 26; 960 + } 961 + if (buffers_iov[BUF_T_LARGE].iov_base) { 962 + buffers_iov[BUF_T_LARGE].iov_base -= 13; 963 + buffers_iov[BUF_T_LARGE].iov_len += 26; 964 + } 965 + out: 966 + io_uring_queue_exit(&ring); 842 967 } 843 968 844 - ret = test_inet_send(&ring); 845 - if (ret) { 846 - fprintf(stderr, "test_inet_send() failed\n"); 847 - return T_EXIT_FAIL; 848 - } 849 - out: 850 - io_uring_queue_exit(&ring); 851 - close(sp[0]); 852 - close(sp[1]); 853 969 return T_EXIT_PASS; 854 970 }

+91 -15

vendor/liburing/test/send_recv.c

··· 23 23 #define HOST "127.0.0.1" 24 24 25 25 static int recv_prep(struct io_uring *ring, struct iovec *iov, int *sock, 26 - int registerfiles) 26 + int registerfiles, int async, int provide) 27 27 { 28 28 struct sockaddr_in saddr; 29 29 struct io_uring_sqe *sqe; ··· 64 64 io_uring_prep_recv(sqe, use_fd, iov->iov_base, iov->iov_len, 0); 65 65 if (registerfiles) 66 66 sqe->flags |= IOSQE_FIXED_FILE; 67 + if (async) 68 + sqe->flags |= IOSQE_ASYNC; 69 + if (provide) 70 + sqe->flags |= IOSQE_BUFFER_SELECT; 67 71 sqe->user_data = 2; 68 72 69 73 ret = io_uring_submit(ring); ··· 79 83 return 1; 80 84 } 81 85 82 - static int do_recv(struct io_uring *ring, struct iovec *iov) 86 + static int do_recv(struct io_uring *ring, struct iovec *iov, int enobufs) 83 87 { 84 88 struct io_uring_cqe *cqe; 85 89 int ret; ··· 87 91 ret = io_uring_wait_cqe(ring, &cqe); 88 92 if (ret) { 89 93 fprintf(stdout, "wait_cqe: %d\n", ret); 90 - goto err; 94 + return 1; 91 95 } 92 96 if (cqe->res == -EINVAL) { 93 97 fprintf(stdout, "recv not supported, skipping\n"); 94 - return 0; 98 + goto out; 99 + } 100 + if (cqe->res == -ENOBUFS && enobufs) { 101 + if (cqe->flags & IORING_CQE_F_SOCK_NONEMPTY) { 102 + fprintf(stdout, "NONEMPTY set on -ENOBUFS\n"); 103 + goto err; 104 + } 105 + goto out; 95 106 } 96 107 if (cqe->res < 0) { 97 108 fprintf(stderr, "failed cqe: %d\n", cqe->res); ··· 109 120 goto err; 110 121 } 111 122 123 + out: 124 + io_uring_cqe_seen(ring, cqe); 112 125 return 0; 113 126 err: 127 + io_uring_cqe_seen(ring, cqe); 114 128 return 1; 115 129 } 116 130 ··· 118 132 pthread_mutex_t mutex; 119 133 int use_sqthread; 120 134 int registerfiles; 135 + int async; 136 + int provide; 121 137 }; 122 138 123 139 static void *recv_fn(void *data) ··· 152 168 } 153 169 } 154 170 155 - ret = recv_prep(&ring, &iov, &sock, rd->registerfiles); 171 + ret = recv_prep(&ring, &iov, &sock, rd->registerfiles, rd->async, 172 + rd->provide); 156 173 if (ret) { 157 174 fprintf(stderr, "recv_prep failed: %d\n", ret); 158 175 goto err; 159 176 } 160 177 pthread_mutex_unlock(&rd->mutex); 161 - ret = do_recv(&ring, &iov); 178 + ret = do_recv(&ring, &iov, rd->provide); 162 179 163 180 close(sock); 164 181 io_uring_queue_exit(&ring); ··· 232 249 return 1; 233 250 } 234 251 235 - static int test(int use_sqthread, int regfiles) 252 + static int test(int use_sqthread, int regfiles, int async, int provide) 236 253 { 237 254 pthread_mutexattr_t attr; 238 255 pthread_t recv_thread; ··· 246 263 pthread_mutex_lock(&rd.mutex); 247 264 rd.use_sqthread = use_sqthread; 248 265 rd.registerfiles = regfiles; 266 + rd.async = async; 267 + rd.provide = provide; 249 268 250 269 ret = pthread_create(&recv_thread, NULL, recv_fn, &rd); 251 270 if (ret) { ··· 268 287 struct io_uring_cqe *cqe; 269 288 struct io_uring_sqe *sqe; 270 289 271 - ret = t_create_ring(8, &ring, 0); 272 - if (ret) 290 + ret = t_create_ring(8, &ring, IORING_SETUP_SUBMIT_ALL); 291 + if (ret) { 292 + if (ret == -EINVAL) 293 + return 0; 273 294 return ret; 295 + } 274 296 275 297 ret = t_create_socket_pair(fds, true); 276 298 if (ret) ··· 314 336 return ret; 315 337 } 316 338 317 - ret = test(0, 0); 339 + ret = test(0, 0, 1, 1); 318 340 if (ret) { 319 - fprintf(stderr, "test sqthread=0 failed\n"); 341 + fprintf(stderr, "test sqthread=0 1 1 failed\n"); 320 342 return ret; 321 343 } 322 344 323 - ret = test(1, 1); 345 + ret = test(1, 1, 1, 1); 324 346 if (ret) { 325 - fprintf(stderr, "test sqthread=1 reg=1 failed\n"); 347 + fprintf(stderr, "test sqthread=1 reg=1 1 1 failed\n"); 326 348 return ret; 327 349 } 328 350 329 - ret = test(1, 0); 351 + ret = test(1, 0, 1, 1); 330 352 if (ret) { 331 - fprintf(stderr, "test sqthread=1 reg=0 failed\n"); 353 + fprintf(stderr, "test sqthread=1 reg=0 1 1 failed\n"); 354 + return ret; 355 + } 356 + 357 + ret = test(0, 0, 0, 1); 358 + if (ret) { 359 + fprintf(stderr, "test sqthread=0 0 1 failed\n"); 360 + return ret; 361 + } 362 + 363 + ret = test(1, 1, 0, 1); 364 + if (ret) { 365 + fprintf(stderr, "test sqthread=1 reg=1 0 1 failed\n"); 366 + return ret; 367 + } 368 + 369 + ret = test(1, 0, 0, 1); 370 + if (ret) { 371 + fprintf(stderr, "test sqthread=1 reg=0 0 1 failed\n"); 372 + return ret; 373 + } 374 + 375 + ret = test(0, 0, 1, 0); 376 + if (ret) { 377 + fprintf(stderr, "test sqthread=0 0 1 failed\n"); 378 + return ret; 379 + } 380 + 381 + ret = test(1, 1, 1, 0); 382 + if (ret) { 383 + fprintf(stderr, "test sqthread=1 reg=1 1 0 failed\n"); 384 + return ret; 385 + } 386 + 387 + ret = test(1, 0, 1, 0); 388 + if (ret) { 389 + fprintf(stderr, "test sqthread=1 reg=0 1 0 failed\n"); 390 + return ret; 391 + } 392 + 393 + ret = test(0, 0, 0, 0); 394 + if (ret) { 395 + fprintf(stderr, "test sqthread=0 0 0 failed\n"); 396 + return ret; 397 + } 398 + 399 + ret = test(1, 1, 0, 0); 400 + if (ret) { 401 + fprintf(stderr, "test sqthread=1 reg=1 0 0 failed\n"); 402 + return ret; 403 + } 404 + 405 + ret = test(1, 0, 0, 0); 406 + if (ret) { 407 + fprintf(stderr, "test sqthread=1 reg=0 0 0 failed\n"); 332 408 return ret; 333 409 } 334 410

-200

vendor/liburing/test/sendmsg_fs_cve.c

··· 1 - /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 - /* 3 - * repro-CVE-2020-29373 -- Reproducer for CVE-2020-29373. 4 - * 5 - * Copyright (c) 2021 SUSE 6 - * Author: Nicolai Stange <nstange@suse.de> 7 - * 8 - * This program is free software; you can redistribute it and/or 9 - * modify it under the terms of the GNU General Public License 10 - * as published by the Free Software Foundation; either version 2 11 - * of the License, or (at your option) any later version. 12 - * 13 - * This program is distributed in the hope that it will be useful, 14 - * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 - * GNU General Public License for more details. 17 - * 18 - * You should have received a copy of the GNU General Public License 19 - * along with this program; if not, see <https://www.gnu.org/licenses/>. 20 - */ 21 - 22 - #include <unistd.h> 23 - #include <stdio.h> 24 - #include <string.h> 25 - #include <sys/mman.h> 26 - #include <sys/socket.h> 27 - #include <sys/un.h> 28 - #include <fcntl.h> 29 - #include <errno.h> 30 - #include <inttypes.h> 31 - #include <stdlib.h> 32 - #include <sys/types.h> 33 - #include <sys/wait.h> 34 - #include "liburing.h" 35 - 36 - /* 37 - * This attempts to make the kernel issue a sendmsg() to 38 - * path from io_uring's async io_sq_wq_submit_work(). 39 - * 40 - * Unfortunately, IOSQE_ASYNC is available only from kernel version 41 - * 5.6 onwards. To still force io_uring to process the request 42 - * asynchronously from io_sq_wq_submit_work(), queue a couple of 43 - * auxiliary requests all failing with EAGAIN before. This is 44 - * implemented by writing repeatedly to an auxiliary O_NONBLOCK 45 - * AF_UNIX socketpair with a small SO_SNDBUF. 46 - */ 47 - static int try_sendmsg_async(const char * const path) 48 - { 49 - int snd_sock, r; 50 - struct io_uring ring; 51 - char sbuf[16] = {}; 52 - struct iovec siov = { .iov_base = &sbuf, .iov_len = sizeof(sbuf) }; 53 - struct sockaddr_un addr = {}; 54 - struct msghdr msg = { 55 - .msg_name = &addr, 56 - .msg_namelen = sizeof(addr), 57 - .msg_iov = &siov, 58 - .msg_iovlen = 1, 59 - }; 60 - struct io_uring_cqe *cqe; 61 - struct io_uring_sqe *sqe; 62 - 63 - snd_sock = socket(AF_UNIX, SOCK_DGRAM, 0); 64 - if (snd_sock < 0) { 65 - perror("socket(AF_UNIX)"); 66 - return -1; 67 - } 68 - 69 - addr.sun_family = AF_UNIX; 70 - strcpy(addr.sun_path, path); 71 - 72 - r = io_uring_queue_init(512, &ring, 0); 73 - if (r < 0) { 74 - fprintf(stderr, "ring setup failed: %d\n", r); 75 - goto close_iour; 76 - } 77 - 78 - sqe = io_uring_get_sqe(&ring); 79 - if (!sqe) { 80 - fprintf(stderr, "get sqe failed\n"); 81 - r = -EFAULT; 82 - goto close_iour; 83 - } 84 - 85 - /* the actual one supposed to fail with -ENOENT. */ 86 - io_uring_prep_sendmsg(sqe, snd_sock, &msg, 0); 87 - sqe->flags = IOSQE_ASYNC; 88 - sqe->user_data = 255; 89 - 90 - r = io_uring_submit(&ring); 91 - if (r != 1) { 92 - fprintf(stderr, "sqe submit failed: %d\n", r); 93 - r = -EFAULT; 94 - goto close_iour; 95 - } 96 - 97 - r = io_uring_wait_cqe(&ring, &cqe); 98 - if (r < 0) { 99 - fprintf(stderr, "wait completion %d\n", r); 100 - r = -EFAULT; 101 - goto close_iour; 102 - } 103 - if (cqe->user_data != 255) { 104 - fprintf(stderr, "user data %d\n", r); 105 - r = -EFAULT; 106 - goto close_iour; 107 - } 108 - if (cqe->res != -ENOENT) { 109 - r = 3; 110 - fprintf(stderr, 111 - "error: cqe %i: res=%i, but expected -ENOENT\n", 112 - (int)cqe->user_data, (int)cqe->res); 113 - } 114 - io_uring_cqe_seen(&ring, cqe); 115 - 116 - close_iour: 117 - io_uring_queue_exit(&ring); 118 - close(snd_sock); 119 - return r; 120 - } 121 - 122 - int main(int argc, char *argv[]) 123 - { 124 - int r; 125 - char tmpdir[] = "/tmp/tmp.XXXXXX"; 126 - int rcv_sock; 127 - struct sockaddr_un addr = {}; 128 - pid_t c; 129 - int wstatus; 130 - 131 - if (!mkdtemp(tmpdir)) { 132 - perror("mkdtemp()"); 133 - return 1; 134 - } 135 - 136 - rcv_sock = socket(AF_UNIX, SOCK_DGRAM, 0); 137 - if (rcv_sock < 0) { 138 - perror("socket(AF_UNIX)"); 139 - r = 1; 140 - goto rmtmpdir; 141 - } 142 - 143 - addr.sun_family = AF_UNIX; 144 - snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/sock", tmpdir); 145 - 146 - r = bind(rcv_sock, (struct sockaddr *)&addr, 147 - sizeof(addr)); 148 - if (r < 0) { 149 - perror("bind()"); 150 - close(rcv_sock); 151 - r = 1; 152 - goto rmtmpdir; 153 - } 154 - 155 - c = fork(); 156 - if (!c) { 157 - close(rcv_sock); 158 - 159 - r = chroot(tmpdir); 160 - if (r) { 161 - if (errno == EPERM) { 162 - fprintf(stderr, "chroot not allowed, skip\n"); 163 - return 0; 164 - } 165 - 166 - perror("chroot()"); 167 - return 1; 168 - } 169 - 170 - r = try_sendmsg_async(addr.sun_path); 171 - if (r < 0) { 172 - /* system call failure */ 173 - r = 1; 174 - } else if (r) { 175 - /* test case failure */ 176 - r += 1; 177 - } 178 - return r; 179 - } 180 - 181 - if (waitpid(c, &wstatus, 0) == (pid_t)-1) { 182 - perror("waitpid()"); 183 - r = 1; 184 - goto rmsock; 185 - } 186 - if (!WIFEXITED(wstatus)) { 187 - fprintf(stderr, "child got terminated\n"); 188 - r = 1; 189 - goto rmsock; 190 - } 191 - r = WEXITSTATUS(wstatus); 192 - if (r) 193 - fprintf(stderr, "error: Test failed\n"); 194 - rmsock: 195 - close(rcv_sock); 196 - unlink(addr.sun_path); 197 - rmtmpdir: 198 - rmdir(tmpdir); 199 - return r; 200 - }

+2 -1

vendor/liburing/test/shutdown.c

··· 47 47 addr.sin_family = AF_INET; 48 48 addr.sin_addr.s_addr = inet_addr("127.0.0.1"); 49 49 50 - assert(!t_bind_ephemeral_port(recv_s0, &addr)); 50 + ret = t_bind_ephemeral_port(recv_s0, &addr); 51 + assert(!ret); 51 52 ret = listen(recv_s0, 128); 52 53 assert(ret != -1); 53 54

+346

vendor/liburing/test/socket-getsetsock-cmd.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Description: Check that {g,s}etsockopt CMD operations on sockets are 4 + * consistent. 5 + * 6 + * The tests basically do the same socket operation using regular system calls 7 + * and io_uring commands, and then compare the results. 8 + */ 9 + 10 + #include <stdio.h> 11 + #include <assert.h> 12 + #include <string.h> 13 + #include <unistd.h> 14 + #include <linux/tcp.h> 15 + 16 + #include "liburing.h" 17 + #include "helpers.h" 18 + 19 + #define USERDATA 0xff42ff 20 + #define MSG "foobarbaz" 21 + 22 + static int no_sock_opt; 23 + 24 + struct fds { 25 + int tx; 26 + int rx; 27 + }; 28 + 29 + static struct fds create_sockets(void) 30 + { 31 + struct fds retval; 32 + int fd[2]; 33 + 34 + t_create_socket_pair(fd, true); 35 + 36 + retval.tx = fd[0]; 37 + retval.rx = fd[1]; 38 + 39 + return retval; 40 + } 41 + 42 + static struct io_uring create_ring(void) 43 + { 44 + struct io_uring ring; 45 + int ring_flags = 0; 46 + int err; 47 + 48 + err = io_uring_queue_init(32, &ring, ring_flags); 49 + assert(err == 0); 50 + 51 + return ring; 52 + } 53 + 54 + static int submit_cmd_sqe(struct io_uring *ring, int32_t fd, 55 + int op, int level, int optname, 56 + void *optval, int optlen, 57 + bool async) 58 + { 59 + struct io_uring_sqe *sqe; 60 + int err; 61 + 62 + assert(fd > 0); 63 + 64 + sqe = io_uring_get_sqe(ring); 65 + assert(sqe != NULL); 66 + 67 + io_uring_prep_cmd_sock(sqe, op, fd, level, optname, optval, optlen); 68 + sqe->user_data = USERDATA; 69 + if (async) 70 + sqe->flags |= IOSQE_ASYNC; 71 + 72 + /* Submitting SQE */ 73 + err = io_uring_submit_and_wait(ring, 1); 74 + if (err != 1) 75 + fprintf(stderr, "Failure: io_uring_submit_and_wait returned %d\n", err); 76 + 77 + return err; 78 + } 79 + 80 + static int receive_cqe(struct io_uring *ring) 81 + { 82 + struct io_uring_cqe *cqe; 83 + int err; 84 + 85 + err = io_uring_wait_cqe(ring, &cqe); 86 + assert(err == 0); 87 + assert(cqe->user_data == USERDATA); 88 + io_uring_cqe_seen(ring, cqe); 89 + 90 + /* Return the result of the operation */ 91 + return cqe->res; 92 + } 93 + 94 + /* 95 + * Run getsock operation using SO_RCVBUF using io_uring cmd operation and 96 + * getsockopt(2) and compare the results. 97 + */ 98 + static int run_get_rcvbuf(struct io_uring *ring, struct fds *sockfds, bool async) 99 + { 100 + int sval, uval, ulen, err; 101 + unsigned int slen; 102 + 103 + /* System call values */ 104 + slen = sizeof(sval); 105 + /* io_uring values */ 106 + ulen = sizeof(uval); 107 + 108 + /* get through io_uring cmd */ 109 + err = submit_cmd_sqe(ring, sockfds->rx, SOCKET_URING_OP_GETSOCKOPT, 110 + SOL_SOCKET, SO_RCVBUF, &uval, ulen, async); 111 + assert(err == 1); 112 + 113 + /* Wait for the CQE */ 114 + err = receive_cqe(ring); 115 + if (err == -EOPNOTSUPP) 116 + return T_EXIT_SKIP; 117 + if (err < 0) { 118 + fprintf(stderr, "Error received. %d\n", err); 119 + return T_EXIT_FAIL; 120 + } 121 + /* The output of CQE->res contains the length */ 122 + ulen = err; 123 + 124 + /* Executes the same operation using system call */ 125 + err = getsockopt(sockfds->rx, SOL_SOCKET, SO_RCVBUF, &sval, &slen); 126 + assert(err == 0); 127 + 128 + /* Make sure that io_uring operation returns the same value as the systemcall */ 129 + assert(ulen == slen); 130 + assert(uval == sval); 131 + 132 + return T_EXIT_PASS; 133 + } 134 + 135 + /* 136 + * Run getsock operation using SO_PEERNAME using io_uring cmd operation 137 + * and getsockopt(2) and compare the results. 138 + */ 139 + static int run_get_peername(struct io_uring *ring, struct fds *sockfds, bool async) 140 + { 141 + struct sockaddr sval, uval = {}; 142 + socklen_t slen = sizeof(sval); 143 + socklen_t ulen = sizeof(uval); 144 + int err; 145 + 146 + /* Get values from the systemcall */ 147 + err = getsockopt(sockfds->tx, SOL_SOCKET, SO_PEERNAME, &sval, &slen); 148 + assert(err == 0); 149 + 150 + /* Getting SO_PEERNAME */ 151 + err = submit_cmd_sqe(ring, sockfds->rx, SOCKET_URING_OP_GETSOCKOPT, 152 + SOL_SOCKET, SO_PEERNAME, &uval, ulen, async); 153 + assert(err == 1); 154 + 155 + /* Wait for the CQE */ 156 + err = receive_cqe(ring); 157 + if (err == -EOPNOTSUPP || err == -EINVAL) { 158 + no_sock_opt = 1; 159 + return T_EXIT_SKIP; 160 + } 161 + 162 + if (err < 0) { 163 + fprintf(stderr, "%s: Error in the CQE: %d\n", __func__, err); 164 + return T_EXIT_FAIL; 165 + } 166 + 167 + /* The length comes from cqe->res, which is returned from receive_cqe() */ 168 + ulen = err; 169 + 170 + /* Make sure that io_uring operation returns the same values as the systemcall */ 171 + assert(sval.sa_family == uval.sa_family); 172 + assert(slen == ulen); 173 + 174 + return T_EXIT_PASS; 175 + } 176 + 177 + /* 178 + * Run getsockopt tests. Basically comparing io_uring output and systemcall results 179 + */ 180 + static int run_getsockopt_test(struct io_uring *ring, struct fds *sockfds) 181 + { 182 + int err; 183 + 184 + err = run_get_peername(ring, sockfds, false); 185 + if (err) 186 + return err; 187 + 188 + err = run_get_peername(ring, sockfds, true); 189 + if (err) 190 + return err; 191 + 192 + err = run_get_rcvbuf(ring, sockfds, false); 193 + if (err) 194 + return err; 195 + 196 + return run_get_rcvbuf(ring, sockfds, true); 197 + } 198 + 199 + /* 200 + * Given a `val` value, set it in SO_REUSEPORT using io_uring cmd, and read using 201 + * getsockopt(2), and make sure they match. 202 + */ 203 + static int run_setsockopt_reuseport(struct io_uring *ring, struct fds *sockfds, 204 + int val, bool async) 205 + { 206 + unsigned int slen, ulen; 207 + int sval, uval = val; 208 + int err; 209 + 210 + slen = sizeof(sval); 211 + ulen = sizeof(uval); 212 + 213 + /* Setting SO_REUSEPORT */ 214 + err = submit_cmd_sqe(ring, sockfds->rx, SOCKET_URING_OP_SETSOCKOPT, 215 + SOL_SOCKET, SO_REUSEPORT, &uval, ulen, async); 216 + assert(err == 1); 217 + 218 + err = receive_cqe(ring); 219 + if (err == -EOPNOTSUPP) 220 + return T_EXIT_SKIP; 221 + 222 + /* Get values from the systemcall */ 223 + err = getsockopt(sockfds->rx, SOL_SOCKET, SO_REUSEPORT, &sval, &slen); 224 + assert(err == 0); 225 + 226 + /* Make sure the set using io_uring cmd matches what systemcall returns */ 227 + assert(uval == sval); 228 + assert(ulen == slen); 229 + 230 + return T_EXIT_PASS; 231 + } 232 + 233 + /* 234 + * Given a `val` value, set the TCP_USER_TIMEOUT using io_uring and read using 235 + * getsockopt(2). Make sure they match 236 + */ 237 + static int run_setsockopt_usertimeout(struct io_uring *ring, struct fds *sockfds, 238 + int val, bool async) 239 + { 240 + int optname = TCP_USER_TIMEOUT; 241 + int level = IPPROTO_TCP; 242 + unsigned int slen, ulen; 243 + int sval, uval, err; 244 + 245 + slen = sizeof(uval); 246 + ulen = sizeof(uval); 247 + 248 + uval = val; 249 + 250 + /* Setting timeout */ 251 + err = submit_cmd_sqe(ring, sockfds->rx, SOCKET_URING_OP_SETSOCKOPT, 252 + level, optname, &uval, ulen, async); 253 + assert(err == 1); 254 + 255 + err = receive_cqe(ring); 256 + if (err == -EOPNOTSUPP) 257 + return T_EXIT_SKIP; 258 + if (err < 0) { 259 + fprintf(stderr, "%s: Got an error: %d\n", __func__, err); 260 + return T_EXIT_FAIL; 261 + } 262 + 263 + /* Get the value from the systemcall, to make sure it was set */ 264 + err = getsockopt(sockfds->rx, level, optname, &sval, &slen); 265 + assert(err == 0); 266 + assert(uval == sval); 267 + 268 + return T_EXIT_PASS; 269 + } 270 + 271 + /* Test setsockopt() for SOL_SOCKET */ 272 + static int run_setsockopt_test(struct io_uring *ring, struct fds *sockfds) 273 + { 274 + int err, i; 275 + int j; 276 + 277 + for (j = 0; j < 2; j++) { 278 + bool async = j & 1; 279 + 280 + for (i = 0; i <= 1; i++) { 281 + err = run_setsockopt_reuseport(ring, sockfds, i, async); 282 + if (err) 283 + return err; 284 + } 285 + 286 + for (i = 1; i <= 10; i++) { 287 + err = run_setsockopt_usertimeout(ring, sockfds, i, async); 288 + if (err) 289 + return err; 290 + } 291 + } 292 + 293 + return err; 294 + } 295 + 296 + /* Send data through the sockets */ 297 + static void send_data(struct fds *s) 298 + { 299 + int written_bytes; 300 + /* Send data sing the sockstruct->send */ 301 + written_bytes = write(s->tx, MSG, strlen(MSG)); 302 + assert(written_bytes == strlen(MSG)); 303 + } 304 + 305 + int main(int argc, char *argv[]) 306 + { 307 + struct fds sockfds; 308 + struct io_uring ring; 309 + int err; 310 + 311 + if (argc > 1) 312 + return T_EXIT_SKIP; 313 + 314 + /* Simply io_uring ring creation */ 315 + ring = create_ring(); 316 + 317 + /* Create sockets */ 318 + sockfds = create_sockets(); 319 + 320 + send_data(&sockfds); 321 + 322 + err = run_getsockopt_test(&ring, &sockfds); 323 + if (err) { 324 + if (err == T_EXIT_SKIP) { 325 + fprintf(stderr, "Skipping tests.\n"); 326 + return T_EXIT_SKIP; 327 + } 328 + fprintf(stderr, "Failed to run test: %d\n", err); 329 + return err; 330 + } 331 + if (no_sock_opt) 332 + return T_EXIT_SKIP; 333 + 334 + err = run_setsockopt_test(&ring, &sockfds); 335 + if (err) { 336 + if (err == T_EXIT_SKIP) { 337 + fprintf(stderr, "Skipping tests.\n"); 338 + return T_EXIT_SKIP; 339 + } 340 + fprintf(stderr, "Failed to run test: %d\n", err); 341 + return err; 342 + } 343 + 344 + io_uring_queue_exit(&ring); 345 + return err; 346 + }

+237

vendor/liburing/test/socket-io-cmd.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Check that CMD operations on sockets are consistent. 4 + */ 5 + #include <stdio.h> 6 + #include <stdlib.h> 7 + #include <stdint.h> 8 + #include <assert.h> 9 + #include <string.h> 10 + #include <unistd.h> 11 + #include <linux/sockios.h> 12 + #include <sys/ioctl.h> 13 + 14 + #include "liburing.h" 15 + #include "helpers.h" 16 + 17 + #define USERDATA 0x1234 18 + #define MSG "foobarbaz" 19 + 20 + static int no_io_cmd; 21 + 22 + struct fds { 23 + int tx; 24 + int rx; 25 + }; 26 + 27 + /* Create 2 sockets (tx, rx) given the socket type */ 28 + static struct fds create_sockets(bool stream) 29 + { 30 + struct fds retval; 31 + int fd[2]; 32 + 33 + t_create_socket_pair(fd, stream); 34 + 35 + retval.tx = fd[0]; 36 + retval.rx = fd[1]; 37 + 38 + return retval; 39 + } 40 + 41 + static int create_sqe_and_submit(struct io_uring *ring, int32_t fd, int op) 42 + { 43 + struct io_uring_sqe *sqe; 44 + int ret; 45 + 46 + assert(fd > 0); 47 + sqe = io_uring_get_sqe(ring); 48 + assert(sqe != NULL); 49 + 50 + io_uring_prep_cmd_sock(sqe, op, fd, 0, 0, NULL, 0); 51 + sqe->user_data = USERDATA; 52 + 53 + /* Submitting SQE */ 54 + ret = io_uring_submit_and_wait(ring, 1); 55 + if (ret <= 0) 56 + return ret; 57 + 58 + return 0; 59 + } 60 + 61 + static int receive_cqe(struct io_uring *ring) 62 + { 63 + struct io_uring_cqe *cqe; 64 + int err; 65 + 66 + err = io_uring_wait_cqe(ring, &cqe); 67 + assert(err == 0); 68 + assert(cqe->user_data == USERDATA); 69 + err = cqe->res; 70 + io_uring_cqe_seen(ring, cqe); 71 + 72 + /* Return the result of the operation */ 73 + return err; 74 + } 75 + 76 + static ssize_t send_data(struct fds *s, char *str) 77 + { 78 + size_t written_bytes; 79 + 80 + written_bytes = write(s->tx, str, strlen(str)); 81 + assert(written_bytes == strlen(MSG)); 82 + 83 + return written_bytes; 84 + } 85 + 86 + static int run_test(bool stream) 87 + { 88 + struct fds sockfds; 89 + ssize_t bytes_in, bytes_out; 90 + struct io_uring ring; 91 + size_t written_bytes; 92 + int error; 93 + 94 + /* Create three sockets */ 95 + sockfds = create_sockets(stream); 96 + assert(sockfds.tx > 0); 97 + assert(sockfds.rx > 0); 98 + /* Send data sing the sockfds->send */ 99 + written_bytes = send_data(&sockfds, MSG); 100 + 101 + /* Simply io_uring ring creation */ 102 + error = t_create_ring(1, &ring, 0); 103 + if (error == T_SETUP_SKIP) 104 + return error; 105 + else if (error != T_SETUP_OK) 106 + return T_EXIT_FAIL; 107 + 108 + error = create_sqe_and_submit(&ring, sockfds.rx, 109 + SOCKET_URING_OP_SIOCINQ); 110 + if (error) 111 + return T_EXIT_FAIL; 112 + bytes_in = receive_cqe(&ring); 113 + if (bytes_in < 0) { 114 + if (bytes_in == -EINVAL || bytes_in == -EOPNOTSUPP) { 115 + no_io_cmd = 1; 116 + return T_EXIT_SKIP; 117 + } 118 + fprintf(stderr, "Bad return value %ld\n", (long) bytes_in); 119 + return T_EXIT_FAIL; 120 + } 121 + 122 + error = create_sqe_and_submit(&ring, sockfds.tx, 123 + SOCKET_URING_OP_SIOCOUTQ); 124 + if (error) 125 + return T_EXIT_FAIL; 126 + 127 + bytes_out = receive_cqe(&ring); 128 + if (bytes_in == -ENOTSUP || bytes_out == -ENOTSUP) { 129 + fprintf(stderr, "Skipping tests. -ENOTSUP returned\n"); 130 + return T_EXIT_SKIP; 131 + } 132 + 133 + /* 134 + * Assert the number of written bytes are either in the socket buffer 135 + * or on the receive side 136 + */ 137 + if (bytes_in + bytes_out != written_bytes) { 138 + fprintf(stderr, "values does not match: %zu+%zu != %zu\n", 139 + bytes_in, bytes_out, written_bytes); 140 + return T_EXIT_FAIL; 141 + } 142 + 143 + io_uring_queue_exit(&ring); 144 + 145 + return T_EXIT_PASS; 146 + } 147 + 148 + /* 149 + * Make sure that siocoutq and siocinq returns the same value 150 + * using ioctl(2) and uring commands for raw sockets 151 + */ 152 + static int run_test_raw(void) 153 + { 154 + int ioctl_siocoutq, ioctl_siocinq; 155 + int uring_siocoutq, uring_siocinq; 156 + struct io_uring ring; 157 + int retry = 0, sock, error; 158 + 159 + sock = socket(PF_INET, SOCK_RAW, IPPROTO_TCP); 160 + if (sock == -1) { 161 + /* You need root to create raw socket */ 162 + perror("Not able to create a raw socket"); 163 + return T_EXIT_SKIP; 164 + } 165 + 166 + /* Get the same operation using uring cmd */ 167 + error = t_create_ring(1, &ring, 0); 168 + if (error == T_SETUP_SKIP) 169 + return error; 170 + else if (error != T_SETUP_OK) 171 + return T_EXIT_FAIL; 172 + 173 + again: 174 + /* Simple SIOCOUTQ using ioctl */ 175 + error = ioctl(sock, SIOCOUTQ, &ioctl_siocoutq); 176 + if (error < 0) { 177 + fprintf(stderr, "Failed to run ioctl(SIOCOUTQ): %d\n", error); 178 + return T_EXIT_FAIL; 179 + } 180 + 181 + error = ioctl(sock, SIOCINQ, &ioctl_siocinq); 182 + if (error < 0) { 183 + fprintf(stderr, "Failed to run ioctl(SIOCINQ): %d\n", error); 184 + return T_EXIT_FAIL; 185 + } 186 + 187 + create_sqe_and_submit(&ring, sock, SOCKET_URING_OP_SIOCOUTQ); 188 + uring_siocoutq = receive_cqe(&ring); 189 + 190 + create_sqe_and_submit(&ring, sock, SOCKET_URING_OP_SIOCINQ); 191 + uring_siocinq = receive_cqe(&ring); 192 + 193 + /* Compare that both values (ioctl and uring CMD) should be similar */ 194 + if (uring_siocoutq != ioctl_siocoutq) { 195 + if (!retry) { 196 + retry = 1; 197 + goto again; 198 + } 199 + fprintf(stderr, "values does not match: %d != %d\n", 200 + uring_siocoutq, ioctl_siocoutq); 201 + return T_EXIT_FAIL; 202 + } 203 + if (uring_siocinq != ioctl_siocinq) { 204 + if (!retry) { 205 + retry = 1; 206 + goto again; 207 + } 208 + fprintf(stderr, "values does not match: %d != %d\n", 209 + uring_siocinq, ioctl_siocinq); 210 + return T_EXIT_FAIL; 211 + } 212 + 213 + return T_EXIT_PASS; 214 + } 215 + 216 + int main(int argc, char *argv[]) 217 + { 218 + int err; 219 + 220 + if (argc > 1) 221 + return 0; 222 + 223 + /* Test SOCK_STREAM */ 224 + err = run_test(true); 225 + if (err) 226 + return err; 227 + if (no_io_cmd) 228 + return T_EXIT_SKIP; 229 + 230 + /* Test SOCK_DGRAM */ 231 + err = run_test(false); 232 + if (err) 233 + return err; 234 + 235 + /* Test raw sockets */ 236 + return run_test_raw(); 237 + }

+2 -1

vendor/liburing/test/socket-rw-eagain.c

··· 42 42 43 43 addr.sin_family = AF_INET; 44 44 addr.sin_addr.s_addr = inet_addr("127.0.0.1"); 45 - assert(!t_bind_ephemeral_port(recv_s0, &addr)); 45 + ret = t_bind_ephemeral_port(recv_s0, &addr); 46 + assert(!ret); 46 47 ret = listen(recv_s0, 128); 47 48 assert(ret != -1); 48 49

+2 -1

vendor/liburing/test/socket-rw-offset.c

··· 44 44 45 45 addr.sin_family = AF_INET; 46 46 addr.sin_addr.s_addr = inet_addr("127.0.0.1"); 47 - assert(!t_bind_ephemeral_port(recv_s0, &addr)); 47 + ret = t_bind_ephemeral_port(recv_s0, &addr); 48 + assert(!ret); 48 49 ret = listen(recv_s0, 128); 49 50 assert(ret != -1); 50 51

+2 -1

vendor/liburing/test/socket-rw.c

··· 44 44 45 45 addr.sin_family = AF_INET; 46 46 addr.sin_addr.s_addr = inet_addr("127.0.0.1"); 47 - assert(!t_bind_ephemeral_port(recv_s0, &addr)); 47 + ret = t_bind_ephemeral_port(recv_s0, &addr); 48 + assert(!ret); 48 49 ret = listen(recv_s0, 128); 49 50 assert(ret != -1); 50 51

-168

vendor/liburing/test/sqpoll-cancel-hang.c

··· 1 - /* SPDX-License-Identifier: MIT */ 2 - #include <fcntl.h> 3 - #include <signal.h> 4 - #include <stdint.h> 5 - #include <stdlib.h> 6 - #include <string.h> 7 - #include <sys/mman.h> 8 - #include <sys/wait.h> 9 - #include <time.h> 10 - #include <unistd.h> 11 - #include "liburing.h" 12 - #include "helpers.h" 13 - #include "../src/syscall.h" 14 - 15 - /* 16 - * This syzbot test is known broken on some archs, just allow the ones that 17 - * are regularly tested. 18 - */ 19 - #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \ 20 - defined(__aarch64__) 21 - 22 - static uint64_t current_time_ms(void) 23 - { 24 - struct timespec ts; 25 - if (clock_gettime(CLOCK_MONOTONIC, &ts)) 26 - exit(1); 27 - return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000; 28 - } 29 - 30 - #define SIZEOF_IO_URING_SQE 64 31 - #define SIZEOF_IO_URING_CQE 16 32 - #define SQ_TAIL_OFFSET 64 33 - #define SQ_RING_MASK_OFFSET 256 34 - #define SQ_RING_ENTRIES_OFFSET 264 35 - #define CQ_RING_ENTRIES_OFFSET 268 36 - #define CQ_CQES_OFFSET 320 37 - 38 - #define IORING_OFF_SQES 0x10000000ULL 39 - 40 - static void kill_and_wait(int pid, int* status) 41 - { 42 - kill(-pid, SIGKILL); 43 - kill(pid, SIGKILL); 44 - while (waitpid(-1, status, __WALL) != pid) { 45 - } 46 - } 47 - 48 - #define WAIT_FLAGS __WALL 49 - 50 - static uint64_t r[3] = {0xffffffffffffffff, 0x0, 0x0}; 51 - 52 - static long syz_io_uring_setup(volatile long a0, volatile long a1, 53 - volatile long a2, volatile long a3, volatile long a4, volatile long 54 - a5) 55 - { 56 - uint32_t entries = (uint32_t)a0; 57 - struct io_uring_params* setup_params = (struct io_uring_params*)a1; 58 - void* vma1 = (void*)a2; 59 - void* vma2 = (void*)a3; 60 - void** ring_ptr_out = (void**)a4; 61 - void** sqes_ptr_out = (void**)a5; 62 - uint32_t fd_io_uring = __sys_io_uring_setup(entries, setup_params); 63 - uint32_t sq_ring_sz = setup_params->sq_off.array + 64 - setup_params->sq_entries * sizeof(uint32_t); 65 - uint32_t cq_ring_sz = setup_params->cq_off.cqes + 66 - setup_params->cq_entries * SIZEOF_IO_URING_CQE; 67 - uint32_t ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz; 68 - *ring_ptr_out = mmap(vma1, ring_sz, PROT_READ | PROT_WRITE, 69 - MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, 70 - IORING_OFF_SQ_RING); 71 - uint32_t sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE; 72 - *sqes_ptr_out = mmap(vma2, sqes_sz, PROT_READ | PROT_WRITE, 73 - MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, IORING_OFF_SQES); 74 - return fd_io_uring; 75 - } 76 - 77 - static long syz_io_uring_submit(volatile long a0, volatile long a1, 78 - volatile long a2, volatile long a3) 79 - { 80 - char* ring_ptr = (char*)a0; 81 - char* sqes_ptr = (char*)a1; 82 - char* sqe = (char*)a2; 83 - uint32_t sqes_index = (uint32_t)a3; 84 - uint32_t sq_ring_entries = *(uint32_t*)(ring_ptr + SQ_RING_ENTRIES_OFFSET); 85 - uint32_t cq_ring_entries = *(uint32_t*)(ring_ptr + CQ_RING_ENTRIES_OFFSET); 86 - uint32_t sq_array_off = (CQ_CQES_OFFSET + cq_ring_entries * 87 - SIZEOF_IO_URING_CQE + 63) & ~63; 88 - if (sq_ring_entries) 89 - sqes_index %= sq_ring_entries; 90 - char* sqe_dest = sqes_ptr + sqes_index * SIZEOF_IO_URING_SQE; 91 - memcpy(sqe_dest, sqe, SIZEOF_IO_URING_SQE); 92 - uint32_t sq_ring_mask = *(uint32_t*)(ring_ptr + SQ_RING_MASK_OFFSET); 93 - uint32_t* sq_tail_ptr = (uint32_t*)(ring_ptr + SQ_TAIL_OFFSET); 94 - uint32_t sq_tail = *sq_tail_ptr & sq_ring_mask; 95 - uint32_t sq_tail_next = *sq_tail_ptr + 1; 96 - uint32_t* sq_array = (uint32_t*)(ring_ptr + sq_array_off); 97 - *(sq_array + sq_tail) = sqes_index; 98 - __atomic_store_n(sq_tail_ptr, sq_tail_next, __ATOMIC_RELEASE); 99 - return 0; 100 - } 101 - 102 - 103 - static void trigger_bug(void) 104 - { 105 - intptr_t res = 0; 106 - *(uint32_t*)0x20000204 = 0; 107 - *(uint32_t*)0x20000208 = 2; 108 - *(uint32_t*)0x2000020c = 0; 109 - *(uint32_t*)0x20000210 = 0; 110 - *(uint32_t*)0x20000218 = -1; 111 - memset((void*)0x2000021c, 0, 12); 112 - res = -1; 113 - res = syz_io_uring_setup(0x7987, 0x20000200, 0x20400000, 0x20ffd000, 0x200000c0, 0x200001c0); 114 - if (res != -1) { 115 - r[0] = res; 116 - r[1] = *(uint64_t*)0x200000c0; 117 - r[2] = *(uint64_t*)0x200001c0; 118 - } 119 - *(uint8_t*)0x20000180 = 0xb; 120 - *(uint8_t*)0x20000181 = 1; 121 - *(uint16_t*)0x20000182 = 0; 122 - *(uint32_t*)0x20000184 = 0; 123 - *(uint64_t*)0x20000188 = 4; 124 - *(uint64_t*)0x20000190 = 0x20000140; 125 - *(uint64_t*)0x20000140 = 0x77359400; 126 - *(uint64_t*)0x20000148 = 0; 127 - *(uint32_t*)0x20000198 = 1; 128 - *(uint32_t*)0x2000019c = 0; 129 - *(uint64_t*)0x200001a0 = 0; 130 - *(uint16_t*)0x200001a8 = 0; 131 - *(uint16_t*)0x200001aa = 0; 132 - memset((void*)0x200001ac, 0, 20); 133 - syz_io_uring_submit(r[1], r[2], 0x20000180, 1); 134 - *(uint32_t*)0x20000544 = 0; 135 - *(uint32_t*)0x20000548 = 0x36; 136 - *(uint32_t*)0x2000054c = 0; 137 - *(uint32_t*)0x20000550 = 0; 138 - *(uint32_t*)0x20000558 = r[0]; 139 - memset((void*)0x2000055c, 0, 12); 140 - 141 - } 142 - int main(void) 143 - { 144 - mmap((void *)0x20000000ul, 0x1000000ul, 7ul, MAP_ANON|MAP_PRIVATE, -1, 0ul); 145 - int pid = fork(); 146 - if (pid < 0) 147 - exit(1); 148 - if (pid == 0) { 149 - trigger_bug(); 150 - exit(0); 151 - } 152 - int status = 0; 153 - uint64_t start = current_time_ms(); 154 - for (;;) { 155 - if (current_time_ms() - start < 1000) { 156 - continue; 157 - } 158 - kill_and_wait(pid, &status); 159 - break; 160 - } 161 - return 0; 162 - } 163 - #else 164 - int main(void) 165 - { 166 - return T_EXIT_SKIP; 167 - } 168 - #endif

+132

vendor/liburing/test/sqpoll-exec.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Description: Check that closing a file with SQPOLL has it immediately closed 4 + * upon receiving the CQE for the close. The 6.9 kernel had a bug 5 + * where SQPOLL would not run kernel wide task_work when running the 6 + * private task_work, which would defer the close if this was the 7 + * final close of the file. 8 + */ 9 + #include <errno.h> 10 + #include <stdio.h> 11 + #include <unistd.h> 12 + #include <stdlib.h> 13 + #include <string.h> 14 + #include <fcntl.h> 15 + #include <sys/time.h> 16 + #include <sys/wait.h> 17 + #include <sys/types.h> 18 + #include <sys/stat.h> 19 + 20 + #include "helpers.h" 21 + #include "liburing.h" 22 + 23 + static int fill_exec_target(char *dst, char *path) 24 + { 25 + struct stat sb; 26 + 27 + /* 28 + * Should either be ./exec-target.t or test/exec-target.t 29 + */ 30 + sprintf(dst, "%s", path); 31 + return stat(dst, &sb); 32 + } 33 + 34 + static int test_exec(struct io_uring *ring, char * const argv[]) 35 + { 36 + char prog_path[PATH_MAX]; 37 + struct io_uring_sqe *sqe; 38 + struct io_uring_cqe *cqe; 39 + int ret, wstatus, fd; 40 + pid_t p; 41 + 42 + if (fill_exec_target(prog_path, "./exec-target.t") && 43 + fill_exec_target(prog_path, "test/exec-target.t")) { 44 + fprintf(stdout, "Can't find exec-target, skipping\n"); 45 + return 0; 46 + } 47 + 48 + sqe = io_uring_get_sqe(ring); 49 + io_uring_prep_openat(sqe, AT_FDCWD, prog_path, O_WRONLY, 0); 50 + sqe->user_data = 0; 51 + 52 + io_uring_submit(ring); 53 + 54 + ret = io_uring_wait_cqe(ring, &cqe); 55 + if (ret) { 56 + fprintf(stderr, "wait cqe %d\n", ret); 57 + return 1; 58 + } 59 + if (cqe->res < 0) { 60 + fprintf(stderr, "open: %d\n", cqe->res); 61 + return 1; 62 + } 63 + fd = cqe->res; 64 + io_uring_cqe_seen(ring, cqe); 65 + 66 + sqe = io_uring_get_sqe(ring); 67 + io_uring_prep_close(sqe, fd); 68 + sqe->user_data = 1; 69 + 70 + io_uring_submit(ring); 71 + 72 + ret = io_uring_wait_cqe(ring, &cqe); 73 + if (ret) { 74 + fprintf(stderr, "wait cqe %d\n", ret); 75 + return 1; 76 + } 77 + if (cqe->res < 0) { 78 + fprintf(stderr, "close: %d\n", cqe->res); 79 + return 1; 80 + } 81 + io_uring_cqe_seen(ring, cqe); 82 + 83 + p = fork(); 84 + if (p == -1) { 85 + fprintf(stderr, "fork() failed\n"); 86 + return 1; 87 + } 88 + 89 + if (p == 0) { 90 + /* file should be closed, try exec'ing it */ 91 + ret = execve(prog_path, argv, NULL); 92 + if (ret) { 93 + fprintf(stderr, "exec failed: %s\n", strerror(errno)); 94 + exit(1); 95 + } 96 + } 97 + 98 + if (waitpid(p, &wstatus, 0) == (pid_t)-1) { 99 + perror("waitpid()"); 100 + return 1; 101 + } 102 + if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus)) 103 + return 1; 104 + 105 + return 0; 106 + } 107 + 108 + int main(int argc, char * const argv[]) 109 + { 110 + struct io_uring_params p = { .flags = IORING_SETUP_SQPOLL, }; 111 + struct io_uring ring; 112 + int ret, i; 113 + 114 + if (argc > 1) 115 + return T_EXIT_SKIP; 116 + 117 + ret = t_create_ring_params(8, &ring, &p); 118 + if (ret == T_SETUP_SKIP) 119 + return T_EXIT_SKIP; 120 + else if (ret != T_SETUP_OK) 121 + return T_EXIT_FAIL; 122 + 123 + for (i = 0; i < 20; i++) { 124 + ret = test_exec(&ring, argv); 125 + if (ret) { 126 + fprintf(stderr, "test_exec failed\n"); 127 + return ret; 128 + } 129 + } 130 + 131 + return T_EXIT_PASS; 132 + }

+1 -1

vendor/liburing/test/timeout.c

··· 1233 1233 exit(1); 1234 1234 } 1235 1235 1236 - /* trigger full cancellation */ 1236 + /* trigger full cancelation */ 1237 1237 ret = execl(prog_path, prog_path, NULL); 1238 1238 if (ret) { 1239 1239 fprintf(stderr, "exec failed %i\n", errno);

+186

vendor/liburing/test/truncate.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Description: run various truncate tests 4 + * 5 + */ 6 + #include <errno.h> 7 + #include <stdio.h> 8 + #include <unistd.h> 9 + #include <stdlib.h> 10 + #include <string.h> 11 + #include <fcntl.h> 12 + #include <sys/stat.h> 13 + #include <sys/ioctl.h> 14 + 15 + #include "liburing.h" 16 + #include "helpers.h" 17 + 18 + #define TWO_GIG_SIZE ((loff_t)2 * 1024 * 1024 * 1024) 19 + #define ONE_GIG_SIZE ((loff_t)1024 * 1024 * 1024) 20 + #define HALF_GIG_SIZE ((loff_t)512 * 1024 * 1024) 21 + 22 + static int test_truncate(struct io_uring *ring, int fd) 23 + { 24 + struct io_uring_cqe *cqe; 25 + struct io_uring_sqe *sqe; 26 + int ret = -1; 27 + 28 + sqe = io_uring_get_sqe(ring); 29 + if (!sqe) { 30 + fprintf(stderr, "get sqe failed\n"); 31 + return T_EXIT_FAIL; 32 + } 33 + 34 + memset(sqe, 0, sizeof(*sqe)); 35 + 36 + io_uring_prep_rw(IORING_OP_FTRUNCATE, sqe, fd, "fail", 0, 4); 37 + 38 + ret = io_uring_submit(ring); 39 + if (ret <= 0) { 40 + fprintf(stderr, "sqe submit failed: %d\n", ret); 41 + return T_EXIT_FAIL; 42 + } 43 + 44 + ret = io_uring_wait_cqe(ring, &cqe); 45 + if (ret < 0) { 46 + fprintf(stderr, "wait completion %d\n", ret); 47 + return T_EXIT_FAIL; 48 + } 49 + ret = cqe->res; 50 + io_uring_cqe_seen(ring, cqe); 51 + if (ret == -EINVAL) 52 + return T_EXIT_PASS; 53 + 54 + fprintf(stderr, "unexpected truncate res %d\n", ret); 55 + return T_EXIT_FAIL; 56 + } 57 + 58 + static int test_ftruncate(struct io_uring *ring, int fd, loff_t len) 59 + { 60 + struct io_uring_cqe *cqe; 61 + struct io_uring_sqe *sqe; 62 + int ret; 63 + 64 + sqe = io_uring_get_sqe(ring); 65 + if (!sqe) { 66 + fprintf(stderr, "get sqe failed\n"); 67 + goto err; 68 + } 69 + 70 + memset(sqe, 0, sizeof(*sqe)); 71 + 72 + io_uring_prep_ftruncate(sqe, fd, len); 73 + 74 + ret = io_uring_submit(ring); 75 + if (ret <= 0) { 76 + fprintf(stderr, "sqe submit failed: %d\n", ret); 77 + goto err; 78 + } 79 + 80 + ret = io_uring_wait_cqe(ring, &cqe); 81 + if (ret < 0) { 82 + fprintf(stderr, "wait completion %d\n", ret); 83 + goto err; 84 + } 85 + ret = cqe->res; 86 + io_uring_cqe_seen(ring, cqe); 87 + return ret; 88 + err: 89 + return 1; 90 + } 91 + 92 + static int get_file_size(int fd, loff_t *size) 93 + { 94 + struct stat st; 95 + 96 + if (fstat(fd, &st) < 0) { 97 + perror("fstat"); 98 + return -1; 99 + } 100 + if (S_ISREG(st.st_mode)) { 101 + *size = st.st_size; 102 + return 0; 103 + } else if (S_ISBLK(st.st_mode)) { 104 + unsigned long long bytes; 105 + 106 + if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) { 107 + perror("ioctl"); 108 + return -1; 109 + } 110 + 111 + *size = bytes; 112 + return 0; 113 + } 114 + 115 + return -1; 116 + } 117 + 118 + int main(int argc, char *argv[]) 119 + { 120 + struct io_uring ring; 121 + char path[32] = ".truncate.XXXXXX"; 122 + int ret; 123 + int fd; 124 + int i; 125 + loff_t size; 126 + loff_t test_sizes[3]; 127 + 128 + if (argc > 1) 129 + return T_EXIT_SKIP; 130 + 131 + ret = io_uring_queue_init(1, &ring, 0); 132 + if (ret) { 133 + fprintf(stderr, "ring setup failed: %d\n", ret); 134 + return T_EXIT_FAIL; 135 + } 136 + 137 + fd = mkostemp(path, O_WRONLY | O_CREAT | O_TRUNC); 138 + if (fd < 0) { 139 + perror("mkostemp"); 140 + return T_EXIT_FAIL; 141 + } 142 + 143 + test_sizes[0] = TWO_GIG_SIZE; 144 + test_sizes[1] = ONE_GIG_SIZE; 145 + test_sizes[2] = HALF_GIG_SIZE; 146 + 147 + for (i = 0; i < 3; i++) { 148 + ret = test_ftruncate(&ring, fd, test_sizes[i]); 149 + if (ret < 0) { 150 + if (ret == -EBADF || ret == -EINVAL) { 151 + if (i == 0) { 152 + fprintf(stdout, "Ftruncate not supported, skipping\n"); 153 + ret = T_EXIT_SKIP; 154 + goto out; 155 + } 156 + goto err; 157 + } 158 + fprintf(stderr, "ftruncate: %s\n", strerror(-ret)); 159 + goto err; 160 + } else if (ret) { 161 + fprintf(stderr, "unexpected cqe->res %d\n", ret); 162 + goto err; 163 + } 164 + if (get_file_size(fd, &size)) 165 + goto err; 166 + if (size != test_sizes[i]) { 167 + fprintf(stderr, "fail %d size=%llu, %llu\n", i, 168 + (unsigned long long) size, 169 + (unsigned long long) test_sizes[i]); 170 + goto err; 171 + } 172 + } 173 + 174 + ret = test_truncate(&ring, fd); 175 + if (ret != T_EXIT_PASS) 176 + goto err; 177 + 178 + out: 179 + unlink(path); 180 + close(fd); 181 + return T_EXIT_PASS; 182 + err: 183 + unlink(path); 184 + close(fd); 185 + return T_EXIT_FAIL; 186 + }

+2 -2

vendor/liburing/test/version.c

··· 8 8 9 9 int main(int argc, char *argv[]) 10 10 { 11 - if (!IO_URING_CHECK_VERSION(io_uring_major_version(), io_uring_minor_version())) 11 + if (IO_URING_CHECK_VERSION(io_uring_major_version(), io_uring_minor_version())) 12 12 return T_EXIT_FAIL; 13 13 14 14 if (io_uring_major_version() != IO_URING_VERSION_MAJOR) ··· 17 17 if (io_uring_minor_version() != IO_URING_VERSION_MINOR) 18 18 return T_EXIT_FAIL; 19 19 20 - #if !IO_URING_CHECK_VERSION(IO_URING_VERSION_MAJOR, IO_URING_VERSION_MINOR) 20 + #if IO_URING_CHECK_VERSION(IO_URING_VERSION_MAJOR, IO_URING_VERSION_MINOR) 21 21 return T_EXIT_FAIL; 22 22 #endif 23 23

+373

vendor/liburing/test/waitid.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Description: test waitid functionality 4 + */ 5 + #include <stdio.h> 6 + #include <stdlib.h> 7 + #include <unistd.h> 8 + #include <string.h> 9 + 10 + #include "liburing.h" 11 + #include "helpers.h" 12 + 13 + static bool no_waitid; 14 + 15 + static void child(long usleep_time) 16 + { 17 + if (usleep_time) 18 + usleep(usleep_time); 19 + exit(0); 20 + } 21 + 22 + /* 23 + * Test linked timeout with child not exiting in time 24 + */ 25 + static int test_noexit(struct io_uring *ring) 26 + { 27 + struct io_uring_sqe *sqe; 28 + struct io_uring_cqe *cqe; 29 + struct __kernel_timespec ts; 30 + siginfo_t si; 31 + pid_t pid; 32 + int ret, i; 33 + 34 + pid = fork(); 35 + if (!pid) { 36 + child(200000); 37 + exit(0); 38 + } 39 + 40 + sqe = io_uring_get_sqe(ring); 41 + io_uring_prep_waitid(sqe, P_PID, pid, &si, WEXITED, 0); 42 + sqe->flags |= IOSQE_IO_LINK; 43 + sqe->user_data = 1; 44 + 45 + ts.tv_sec = 0; 46 + ts.tv_nsec = 100 * 1000 * 1000ULL; 47 + sqe = io_uring_get_sqe(ring); 48 + io_uring_prep_link_timeout(sqe, &ts, 0); 49 + sqe->user_data = 2; 50 + 51 + io_uring_submit(ring); 52 + 53 + for (i = 0; i < 2; i++) { 54 + ret = io_uring_wait_cqe(ring, &cqe); 55 + if (ret) { 56 + fprintf(stderr, "cqe wait: %d\n", ret); 57 + return T_EXIT_FAIL; 58 + } 59 + if (cqe->user_data == 2 && cqe->res != 1) { 60 + fprintf(stderr, "timeout res: %d\n", cqe->res); 61 + return T_EXIT_FAIL; 62 + } 63 + if (cqe->user_data == 1 && cqe->res != -ECANCELED) { 64 + fprintf(stderr, "waitid res: %d\n", cqe->res); 65 + return T_EXIT_FAIL; 66 + } 67 + io_uring_cqe_seen(ring, cqe); 68 + } 69 + 70 + return T_EXIT_PASS; 71 + } 72 + 73 + /* 74 + * Test one child exiting, but not the one we were looking for 75 + */ 76 + static int test_double(struct io_uring *ring) 77 + { 78 + struct io_uring_sqe *sqe; 79 + struct io_uring_cqe *cqe; 80 + siginfo_t si; 81 + pid_t p1, p2; 82 + int ret; 83 + 84 + /* p1 will exit shortly */ 85 + p1 = fork(); 86 + if (!p1) { 87 + child(100000); 88 + exit(0); 89 + } 90 + 91 + /* p2 will linger */ 92 + p2 = fork(); 93 + if (!p2) { 94 + child(200000); 95 + exit(0); 96 + } 97 + 98 + sqe = io_uring_get_sqe(ring); 99 + io_uring_prep_waitid(sqe, P_PID, p2, &si, WEXITED, 0); 100 + 101 + io_uring_submit(ring); 102 + 103 + ret = io_uring_wait_cqe(ring, &cqe); 104 + if (ret) { 105 + fprintf(stderr, "cqe wait: %d\n", ret); 106 + return T_EXIT_FAIL; 107 + } 108 + 109 + if (cqe->res < 0) { 110 + fprintf(stderr, "cqe res: %d\n", cqe->res); 111 + return T_EXIT_FAIL; 112 + } 113 + if (si.si_pid != p2) { 114 + fprintf(stderr, "expected pid %d, got %d\n", p2, si.si_pid); 115 + return T_EXIT_FAIL; 116 + } 117 + 118 + io_uring_cqe_seen(ring, cqe); 119 + return T_EXIT_PASS; 120 + } 121 + 122 + /* 123 + * Test reaping of an already exited task 124 + */ 125 + static int test_ready(struct io_uring *ring) 126 + { 127 + struct io_uring_sqe *sqe; 128 + struct io_uring_cqe *cqe; 129 + siginfo_t si; 130 + pid_t pid; 131 + int ret; 132 + 133 + pid = fork(); 134 + if (!pid) { 135 + child(0); 136 + exit(0); 137 + } 138 + 139 + sqe = io_uring_get_sqe(ring); 140 + io_uring_prep_waitid(sqe, P_PID, pid, &si, WEXITED, 0); 141 + 142 + io_uring_submit(ring); 143 + 144 + ret = io_uring_wait_cqe(ring, &cqe); 145 + if (ret) { 146 + fprintf(stderr, "cqe wait: %d\n", ret); 147 + return T_EXIT_FAIL; 148 + } 149 + 150 + if (cqe->res < 0) { 151 + fprintf(stderr, "cqe res: %d\n", cqe->res); 152 + return T_EXIT_FAIL; 153 + } 154 + if (si.si_pid != pid) { 155 + fprintf(stderr, "expected pid %d, got %d\n", pid, si.si_pid); 156 + return T_EXIT_FAIL; 157 + } 158 + 159 + io_uring_cqe_seen(ring, cqe); 160 + return T_EXIT_PASS; 161 + } 162 + 163 + /* 164 + * Test cancelation of pending waitid 165 + */ 166 + static int test_cancel(struct io_uring *ring) 167 + { 168 + struct io_uring_sqe *sqe; 169 + struct io_uring_cqe *cqe; 170 + int ret, i; 171 + pid_t pid; 172 + 173 + pid = fork(); 174 + if (!pid) { 175 + child(20000); 176 + exit(0); 177 + } 178 + 179 + sqe = io_uring_get_sqe(ring); 180 + io_uring_prep_waitid(sqe, P_PID, pid, NULL, WEXITED, 0); 181 + sqe->user_data = 1; 182 + 183 + io_uring_submit(ring); 184 + 185 + sqe = io_uring_get_sqe(ring); 186 + io_uring_prep_cancel64(sqe, 1, 0); 187 + sqe->user_data = 2; 188 + 189 + io_uring_submit(ring); 190 + 191 + for (i = 0; i < 2; i++) { 192 + ret = io_uring_wait_cqe(ring, &cqe); 193 + if (ret) { 194 + fprintf(stderr, "cqe wait: %d\n", ret); 195 + return T_EXIT_FAIL; 196 + } 197 + if (cqe->user_data == 1 && cqe->res != -ECANCELED) { 198 + fprintf(stderr, "cqe res: %d\n", cqe->res); 199 + return T_EXIT_FAIL; 200 + } 201 + if (cqe->user_data == 2 && cqe->res != 1) { 202 + fprintf(stderr, "cqe res: %d\n", cqe->res); 203 + return T_EXIT_FAIL; 204 + } 205 + io_uring_cqe_seen(ring, cqe); 206 + } 207 + 208 + return T_EXIT_PASS; 209 + } 210 + 211 + /* 212 + * Test cancelation of pending waitid, with expected races that either 213 + * waitid trigger or cancelation will win. 214 + */ 215 + static int test_cancel_race(struct io_uring *ring, int async) 216 + { 217 + struct io_uring_sqe *sqe; 218 + struct io_uring_cqe *cqe; 219 + int ret, i; 220 + pid_t pid; 221 + 222 + for (i = 0; i < 10; i++) { 223 + pid = fork(); 224 + if (!pid) { 225 + child(getpid() & 1); 226 + exit(0); 227 + } 228 + } 229 + 230 + sqe = io_uring_get_sqe(ring); 231 + io_uring_prep_waitid(sqe, P_ALL, -1, NULL, WEXITED, 0); 232 + if (async) 233 + sqe->flags |= IOSQE_ASYNC; 234 + sqe->user_data = 1; 235 + 236 + io_uring_submit(ring); 237 + 238 + sqe = io_uring_get_sqe(ring); 239 + io_uring_prep_cancel64(sqe, 1, 0); 240 + sqe->user_data = 2; 241 + 242 + usleep(1); 243 + 244 + io_uring_submit(ring); 245 + 246 + for (i = 0; i < 2; i++) { 247 + ret = io_uring_wait_cqe(ring, &cqe); 248 + if (ret) { 249 + fprintf(stderr, "cqe wait: %d\n", ret); 250 + return T_EXIT_FAIL; 251 + } 252 + if (cqe->user_data == 1 && !(cqe->res == -ECANCELED || 253 + cqe->res == 0)) { 254 + fprintf(stderr, "cqe1 res: %d\n", cqe->res); 255 + return T_EXIT_FAIL; 256 + } 257 + if (cqe->user_data == 2 && 258 + !(cqe->res == 1 || cqe->res == 0 || cqe->res == -ENOENT || 259 + cqe->res == -EALREADY)) { 260 + fprintf(stderr, "cqe2 res: %d\n", cqe->res); 261 + return T_EXIT_FAIL; 262 + } 263 + io_uring_cqe_seen(ring, cqe); 264 + } 265 + 266 + return T_EXIT_PASS; 267 + } 268 + 269 + /* 270 + * Test basic reap of child exit 271 + */ 272 + static int test(struct io_uring *ring) 273 + { 274 + struct io_uring_sqe *sqe; 275 + struct io_uring_cqe *cqe; 276 + siginfo_t si; 277 + pid_t pid; 278 + int ret; 279 + 280 + pid = fork(); 281 + if (!pid) { 282 + child(100); 283 + exit(0); 284 + } 285 + 286 + sqe = io_uring_get_sqe(ring); 287 + io_uring_prep_waitid(sqe, P_PID, pid, &si, WEXITED, 0); 288 + 289 + io_uring_submit(ring); 290 + 291 + ret = io_uring_wait_cqe(ring, &cqe); 292 + if (ret) { 293 + fprintf(stderr, "cqe wait: %d\n", ret); 294 + return T_EXIT_FAIL; 295 + } 296 + 297 + /* no waitid support */ 298 + if (cqe->res == -EINVAL) { 299 + no_waitid = true; 300 + return T_EXIT_SKIP; 301 + } 302 + if (cqe->res < 0) { 303 + fprintf(stderr, "cqe res: %d\n", cqe->res); 304 + return T_EXIT_FAIL; 305 + } 306 + if (si.si_pid != pid) { 307 + fprintf(stderr, "expected pid %d, got %d\n", pid, si.si_pid); 308 + return T_EXIT_FAIL; 309 + } 310 + 311 + io_uring_cqe_seen(ring, cqe); 312 + return T_EXIT_PASS; 313 + } 314 + 315 + int main(int argc, char *argv[]) 316 + { 317 + struct io_uring ring; 318 + int ret, i; 319 + 320 + if (argc > 1) 321 + return T_EXIT_SKIP; 322 + 323 + io_uring_queue_init(8, &ring, 0); 324 + 325 + ret = test(&ring); 326 + if (ret == T_EXIT_FAIL) { 327 + fprintf(stderr, "test failed\n"); 328 + return T_EXIT_FAIL; 329 + } 330 + if (no_waitid) 331 + return T_EXIT_SKIP; 332 + 333 + ret = test_noexit(&ring); 334 + if (ret == T_EXIT_FAIL) { 335 + fprintf(stderr, "test_noexit failed\n"); 336 + return T_EXIT_FAIL; 337 + } 338 + 339 + ret = test_noexit(&ring); 340 + if (ret == T_EXIT_FAIL) { 341 + fprintf(stderr, "test_noexit failed\n"); 342 + return T_EXIT_FAIL; 343 + } 344 + 345 + ret = test_double(&ring); 346 + if (ret == T_EXIT_FAIL) { 347 + fprintf(stderr, "test_double failed\n"); 348 + return T_EXIT_FAIL; 349 + } 350 + 351 + ret = test_ready(&ring); 352 + if (ret == T_EXIT_FAIL) { 353 + fprintf(stderr, "test_ready failed\n"); 354 + return T_EXIT_FAIL; 355 + } 356 + 357 + ret = test_cancel(&ring); 358 + if (ret == T_EXIT_FAIL) { 359 + fprintf(stderr, "test_cancel failed\n"); 360 + return T_EXIT_FAIL; 361 + } 362 + 363 + for (i = 0; i < 1000; i++) { 364 + ret = test_cancel_race(&ring, i & 1); 365 + if (ret == T_EXIT_FAIL) { 366 + fprintf(stderr, "test_cancel_race failed\n"); 367 + return T_EXIT_FAIL; 368 + } 369 + } 370 + 371 + io_uring_queue_exit(&ring); 372 + return T_EXIT_PASS; 373 + }

+146

vendor/liburing/test/wq-aff.c

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Description: test that io-wq affinity is correctly set for SQPOLL 4 + */ 5 + #include <stdio.h> 6 + #include <unistd.h> 7 + #include <fcntl.h> 8 + #include <stdlib.h> 9 + #include <string.h> 10 + 11 + #include "liburing.h" 12 + #include "helpers.h" 13 + 14 + #define IOWQ_CPU 0 15 + #define SQPOLL_CPU 1 16 + 17 + static int verify_comm(pid_t pid, const char *name, int cpu) 18 + { 19 + char comm[64], buf[64]; 20 + cpu_set_t set; 21 + int fd, ret; 22 + 23 + sprintf(comm, "/proc/%d/comm", pid); 24 + fd = open(comm, O_RDONLY); 25 + if (fd < 0) { 26 + perror("open"); 27 + return T_EXIT_SKIP; 28 + } 29 + 30 + ret = read(fd, buf, sizeof(buf)); 31 + if (ret < 0) { 32 + close(fd); 33 + return T_EXIT_SKIP; 34 + } 35 + 36 + if (strncmp(buf, name, strlen(name) - 1)) { 37 + close(fd); 38 + return T_EXIT_SKIP; 39 + } 40 + 41 + close(fd); 42 + 43 + ret = sched_getaffinity(pid, sizeof(set), &set); 44 + if (ret < 0) { 45 + perror("sched_getaffinity"); 46 + return T_EXIT_SKIP; 47 + } 48 + 49 + if (CPU_COUNT(&set) != 1) { 50 + fprintf(stderr, "More than one CPU set in mask\n"); 51 + return T_EXIT_FAIL; 52 + } 53 + if (!CPU_ISSET(cpu, &set)) { 54 + fprintf(stderr, "Wrong CPU set in mask\n"); 55 + return T_EXIT_FAIL; 56 + } 57 + 58 + return T_EXIT_PASS; 59 + } 60 + 61 + static int verify_affinity(pid_t pid, int sqpoll) 62 + { 63 + pid_t wq_pid, sqpoll_pid = -1; 64 + char name[64]; 65 + int ret; 66 + 67 + wq_pid = pid + 2; 68 + if (sqpoll) 69 + sqpoll_pid = pid + 1; 70 + 71 + /* verify we had the pids right */ 72 + sprintf(name, "iou-wrk-%d", pid); 73 + ret = verify_comm(wq_pid, name, IOWQ_CPU); 74 + if (ret != T_EXIT_PASS) 75 + return ret; 76 + 77 + if (sqpoll_pid != -1) { 78 + sprintf(name, "iou-sqp-%d", pid); 79 + ret = verify_comm(sqpoll_pid, name, SQPOLL_CPU); 80 + if (ret != T_EXIT_PASS) 81 + return ret; 82 + } 83 + 84 + return T_EXIT_PASS; 85 + } 86 + 87 + static int test(int sqpoll) 88 + { 89 + struct io_uring_params p = { }; 90 + struct io_uring ring; 91 + struct io_uring_sqe *sqe; 92 + char buf[64]; 93 + int fds[2], ret; 94 + cpu_set_t set; 95 + 96 + if (sqpoll) { 97 + p.flags = IORING_SETUP_SQPOLL | IORING_SETUP_SQ_AFF; 98 + p.sq_thread_cpu = SQPOLL_CPU; 99 + } 100 + 101 + io_uring_queue_init_params(8, &ring, &p); 102 + 103 + CPU_ZERO(&set); 104 + CPU_SET(IOWQ_CPU, &set); 105 + 106 + ret = io_uring_register_iowq_aff(&ring, sizeof(set), &set); 107 + if (ret) { 108 + fprintf(stderr, "register aff: %d\n", ret); 109 + return T_EXIT_FAIL; 110 + } 111 + 112 + if (pipe(fds) < 0) { 113 + perror("pipe"); 114 + return T_EXIT_FAIL; 115 + } 116 + 117 + sqe = io_uring_get_sqe(&ring); 118 + io_uring_prep_read(sqe, fds[0], buf, sizeof(buf), 0); 119 + sqe->flags |= IOSQE_ASYNC; 120 + 121 + io_uring_submit(&ring); 122 + 123 + usleep(10000); 124 + 125 + ret = verify_affinity(getpid(), sqpoll); 126 + io_uring_queue_exit(&ring); 127 + return ret; 128 + } 129 + 130 + int main(int argc, char *argv[]) 131 + { 132 + int ret; 133 + 134 + if (argc > 1) 135 + return T_EXIT_SKIP; 136 + 137 + ret = test(1); 138 + if (ret == T_EXIT_SKIP) { 139 + return T_EXIT_SKIP; 140 + } else if (ret != T_EXIT_PASS) { 141 + fprintf(stderr, "test sqpoll failed\n"); 142 + return T_EXIT_FAIL; 143 + } 144 + 145 + return T_EXIT_PASS; 146 + }

+38 -25

vendor/liburing/test/xattr.c

··· 294 294 /* Test driver for failure cases of fsetxattr and fgetxattr. */ 295 295 static int test_failure_fxattr(void) 296 296 { 297 - int rc = 0; 298 297 struct io_uring ring; 299 298 char value[XATTR_SIZE]; 300 299 ··· 313 312 } 314 313 315 314 /* Test writing attributes. */ 316 - assert(io_uring_fsetxattr(&ring, -1, KEY1, VALUE1, strlen(VALUE1), 0) < 0); 317 - assert(io_uring_fsetxattr(&ring, fd, NULL, VALUE1, strlen(VALUE1), 0) < 0); 318 - assert(io_uring_fsetxattr(&ring, fd, KEY1, NULL, strlen(VALUE1), 0) < 0); 319 - assert(io_uring_fsetxattr(&ring, fd, KEY1, VALUE1, 0, 0) == 0); 320 - assert(io_uring_fsetxattr(&ring, fd, KEY1, VALUE1, -1, 0) < 0); 315 + if (io_uring_fsetxattr(&ring, -1, KEY1, VALUE1, strlen(VALUE1), 0) >= 0) 316 + return 1; 317 + if (io_uring_fsetxattr(&ring, fd, NULL, VALUE1, strlen(VALUE1), 0) >= 0) 318 + return 1; 319 + if (io_uring_fsetxattr(&ring, fd, KEY1, NULL, strlen(VALUE1), 0) >= 0) 320 + return 1; 321 + if (io_uring_fsetxattr(&ring, fd, KEY1, VALUE1, 0, 0) != 0) 322 + return 1; 323 + if (io_uring_fsetxattr(&ring, fd, KEY1, VALUE1, -1, 0) >= 0) 324 + return 1; 321 325 322 326 /* Test reading attributes. */ 323 - assert(io_uring_fgetxattr(&ring, -1, KEY1, value, XATTR_SIZE) < 0); 324 - assert(io_uring_fgetxattr(&ring, fd, NULL, value, XATTR_SIZE) < 0); 325 - assert(io_uring_fgetxattr(&ring, fd, KEY1, value, 0) == 0); 327 + if (io_uring_fgetxattr(&ring, -1, KEY1, value, XATTR_SIZE) >= 0) 328 + return 1; 329 + if (io_uring_fgetxattr(&ring, fd, NULL, value, XATTR_SIZE) >= 0) 330 + return 1; 331 + if (io_uring_fgetxattr(&ring, fd, KEY1, value, 0) != 0) 332 + return 1; 326 333 327 334 /* Cleanup. */ 328 335 close(fd); 329 336 unlink(FILENAME); 330 - 331 337 io_uring_queue_exit(&ring); 332 - 333 - return rc; 338 + return 0; 334 339 } 335 340 336 341 337 342 /* Test driver for failure cases for setxattr and getxattr. */ 338 343 static int test_failure_xattr(void) 339 344 { 340 - int rc = 0; 341 345 struct io_uring ring; 342 346 char value[XATTR_SIZE]; 343 347 ··· 352 356 t_create_file(FILENAME, 0); 353 357 354 358 /* Test writing attributes. */ 355 - assert(io_uring_setxattr(&ring, "complete garbage", KEY1, VALUE1, strlen(VALUE1), 0) < 0); 356 - assert(io_uring_setxattr(&ring, NULL, KEY1, VALUE1, strlen(VALUE1), 0) < 0); 357 - assert(io_uring_setxattr(&ring, FILENAME, NULL, VALUE1, strlen(VALUE1), 0) < 0); 358 - assert(io_uring_setxattr(&ring, FILENAME, KEY1, NULL, strlen(VALUE1), 0) < 0); 359 - assert(io_uring_setxattr(&ring, FILENAME, KEY1, VALUE1, 0, 0) == 0); 359 + if (io_uring_setxattr(&ring, "complete garbage", KEY1, VALUE1, strlen(VALUE1), 0) >= 0) 360 + return 1; 361 + if (io_uring_setxattr(&ring, NULL, KEY1, VALUE1, strlen(VALUE1), 0) >= 0) 362 + return 1; 363 + if (io_uring_setxattr(&ring, FILENAME, NULL, VALUE1, strlen(VALUE1), 0) >= 0) 364 + return 1; 365 + if (io_uring_setxattr(&ring, FILENAME, KEY1, NULL, strlen(VALUE1), 0) >= 0) 366 + return 1; 367 + if (io_uring_setxattr(&ring, FILENAME, KEY1, VALUE1, 0, 0) != 0) 368 + return 1; 360 369 361 370 /* Test reading attributes. */ 362 - assert(io_uring_getxattr(&ring, "complete garbage", KEY1, value, XATTR_SIZE) < 0); 363 - assert(io_uring_getxattr(&ring, NULL, KEY1, value, XATTR_SIZE) < 0); 364 - assert(io_uring_getxattr(&ring, FILENAME, NULL, value, XATTR_SIZE) < 0); 365 - assert(io_uring_getxattr(&ring, FILENAME, KEY1, NULL, XATTR_SIZE) == 0); 366 - assert(io_uring_getxattr(&ring, FILENAME, KEY1, value, 0) == 0); 371 + if (io_uring_getxattr(&ring, "complete garbage", KEY1, value, XATTR_SIZE) >= 0) 372 + return 1; 373 + if (io_uring_getxattr(&ring, NULL, KEY1, value, XATTR_SIZE) >= 0) 374 + return 1; 375 + if (io_uring_getxattr(&ring, FILENAME, NULL, value, XATTR_SIZE) >= 0) 376 + return 1; 377 + if (io_uring_getxattr(&ring, FILENAME, KEY1, NULL, XATTR_SIZE) != 0) 378 + return 1; 379 + if (io_uring_getxattr(&ring, FILENAME, KEY1, value, 0) != 0) 380 + return 1; 367 381 368 382 /* Cleanup. */ 369 383 io_uring_queue_exit(&ring); 370 384 unlink(FILENAME); 371 - 372 - return rc; 385 + return 0; 373 386 } 374 387 375 388 /* Test for invalid SQE, this will cause a segmentation fault if enabled. */

Configure Feed

Configure Feed