···11+## v2.7.0
22+33+- Fix use-after-free data race in CQE handlers (@avsm #124, reported #123 by @polytypic).
44+55+- Add socket bind and listen operations (@avsm #118).
66+77+- Update liburing to 2.7 (@avsm #118).
88+99+- Add `Uring.Setup_flags` module (@talex5 #122).
1010+1111+- Clean up configurator code and update primitives.h (@talex5 #121).
1212+1313+- Add `mkdirat` operation (@patricoferris #120).
1414+115## v0.9
216317- Fix statx constant fallback values, and fix with musl 1.2.5 (@alyssais #114).
41855-- Add Uring.sqe_ready (@talex5 #115).
1919+- Add `Uring.sqe_ready` (@talex5 #115).
620721## v0.8
822
+210-35
lib/uring/uring.mli
···8484val create : ?flags:Setup_flags.t -> ?polling_timeout:int -> queue_depth:int -> unit -> 'a t
8585(** [create ~queue_depth] will return a fresh Io_uring structure [t].
8686 Initially, [t] has no fixed buffer. Use {!set_fixed_buffer} if you want one.
8787+8888+ The [queue_depth] determines the size of the submission queue (SQ) and completion
8989+ queue (CQ) rings. The kernel may round this up to the next power of 2. The actual
9090+ size allocated can be checked with {!queue_depth}.
9191+9292+ @param flags Setup flags to configure ring behavior (see {!Setup_flags})
8793 @param polling_timeout If given, use polling mode with the given idle timeout (in ms).
8888- This requires privileges. *)
9494+ This requires elevated privileges and enables {!Setup_flags.iopoll}.
9595+ @raise Unix.Unix_error if the io_uring_setup system call fails *)
89969097val queue_depth : 'a t -> int
9198(** [queue_depth t] returns the total number of submission slots for the uring [t] *)
929993100val exit : 'a t -> unit
94101(** [exit t] will shut down the uring [t]. Any subsequent requests will fail.
9595- @raise Invalid_argument if there are any requests in progress *)
102102+103103+ This closes the io_uring file descriptor and unmaps the memory rings.
104104+ After calling this, the ring cannot be used again.
105105+106106+ @raise Invalid_argument if there are any requests in progress. Check with
107107+ {!active_ops} to ensure all operations have completed. *)
9610897109(** {2 Fixed buffers}
98110···103115val set_fixed_buffer : 'a t -> Cstruct.buffer -> (unit, [> `ENOMEM]) result
104116(** [set_fixed_buffer t buf] sets [buf] as the fixed buffer for [t].
105117106106- You will normally want to wrap this with {!Region.alloc} or similar
107107- to divide the buffer into chunks.
118118+ Fixed buffers allow zero-copy I/O operations using {!read_fixed} and {!write_fixed}.
119119+ The kernel pins the buffer in memory, avoiding the need to map user pages for each I/O.
120120+ You will normally want to wrap this with {!Region.alloc} or similar to divide the
121121+ buffer into chunks.
108122109123 If [t] already has a buffer set, the old one will be removed.
110124111111- Returns [`ENOMEM] if insufficient kernel resources are available
112112- or the caller's RLIMIT_MEMLOCK resource limit would be exceeded.
113113-125125+ @return [Ok ()] on success, or [Error `ENOMEM] if:
126126+ - Insufficient kernel resources are available
127127+ - The caller's RLIMIT_MEMLOCK resource limit would be exceeded
128128+ - The buffer is too large to pin in memory
114129 @raise Invalid_argument if there are any requests in progress *)
115130116131val buf : 'a t -> Cstruct.buffer
···121136122137val noop : 'a t -> 'a -> 'a job option
123138(** [noop t d] submits a no-op operation to uring [t]. The user data [d] will be
124124- returned by {!wait} or {!peek} upon completion. *)
139139+ returned by {!wait} or {!get_cqe_nonblocking} upon completion.
140140+141141+ This operation does nothing but can be useful for testing the ring, waking up
142142+ a thread waiting on completions, or as a barrier when used with IO_LINK.
143143+ The completion will have [result = 0] on success.
144144+145145+ @return [None] if the submission queue is full; otherwise [Some job] *)
125146126147(** {2 Timeout} *)
127148···133154val timeout: ?absolute:bool -> 'a t -> clock -> int64 -> 'a -> 'a job option
134155(** [timeout t clock ns d] submits a timeout request to uring [t].
135156136136- [absolute] denotes how [clock] and [ns] relate to one another. Default value is [false]
157157+ The timeout will trigger after the specified time has elapsed. When the timeout
158158+ expires, the completion's [result] will be negative with an [ETIME] error indicating
159159+ timeout. The timeout can be cancelled using {!cancel} before it triggers.
137160138138- [ns] is the timeout time in nanoseconds *)
161161+ @param absolute If [false] (default), [ns] is relative to the current time.
162162+ If [true], [ns] is an absolute time value according to [clock]
163163+ @param clock The clock source: {!Boottime} (suspend-aware) or {!Realtime} (wall-clock)
164164+ @param ns The timeout duration in nanoseconds (relative) or absolute time
165165+ @return [None] if the submission queue is full; otherwise [Some job] *)
139166140167(** Flags that can be passed to {!openat2}. *)
141168module Open_flags : sig
···336363337364val poll_add : 'a t -> Unix.file_descr -> Poll_mask.t -> 'a -> 'a job option
338365(** [poll_add t fd mask d] will submit a [poll(2)] request to uring [t].
339339- It completes and returns [d] when an event in [mask] is ready on [fd]. *)
366366+ It completes and returns [d] when an event in [mask] is ready on [fd].
367367+ This is an asynchronous version of poll(2). The operation will complete when
368368+ any of the requested events occur on the file descriptor.
369369+370370+ The completion's [result] field contains:
371371+ - On success: The bitwise OR of events that occurred (always a subset of [mask])
372372+ - On error: A negative error code
373373+374374+ @param fd File descriptor to monitor
375375+ @param mask Bitwise OR of events to monitor (see {!Poll_mask})
376376+ @return [None] if the submission queue is full; otherwise [Some job] *)
340377341378type offset := Optint.Int63.t
342379(** For files, give the absolute offset, or use [Optint.Int63.minus_one] for the current position.
···346383(** [read t ~file_offset fd buf d] will submit a [read(2)] request to uring [t].
347384 It reads from absolute [file_offset] on the [fd] file descriptor and writes
348385 the results into the memory pointed to by [buf]. The user data [d] will
349349- be returned by {!wait} or {!peek} upon completion. *)
386386+ be returned by {!wait} or {!get_cqe_nonblocking} upon completion.
387387+388388+ The completion's [result] field contains the number of bytes read on success,
389389+ 0 for end-of-file, or a negative error code on failure.
390390+391391+ @param file_offset Use {!Optint.Int63.minus_one} for current file position,
392392+ or a specific offset for files. For sockets, use {!Optint.Int63.zero}
393393+ @return [None] if the submission queue is full; otherwise [Some job] *)
350394351395val write : 'a t -> file_offset:offset -> Unix.file_descr -> Cstruct.t -> 'a -> 'a job option
352396(** [write t ~file_offset fd buf d] will submit a [write(2)] request to uring [t].
353397 It writes to absolute [file_offset] on the [fd] file descriptor from the
354398 the memory pointed to by [buf]. The user data [d] will be returned by
355355- {!wait} or {!peek} upon completion. *)
399399+ {!wait} or {!get_cqe_nonblocking} upon completion.
400400+401401+ The completion's [result] field contains the number of bytes written on success,
402402+ or a negative error code on failure. Note that a short write (less than the
403403+ buffer size) is not an error.
404404+405405+ @param file_offset Use {!Optint.Int63.minus_one} for current file position,
406406+ or a specific offset for files. For sockets, use {!Optint.Int63.zero}
407407+ @return [None] if the submission queue is full; otherwise [Some job] *)
356408357409val iov_max : int
358358-(** The maximum length of the list that can be passed to [readv] and similar. *)
410410+(** The maximum length of the list that can be passed to {!readv} and {!writev}. *)
359411360412val readv : 'a t -> file_offset:offset -> Unix.file_descr -> Cstruct.t list -> 'a -> 'a job option
361413(** [readv t ~file_offset fd iov d] will submit a [readv(2)] request to uring [t].
362414 It reads from absolute [file_offset] on the [fd] file descriptor and writes
363415 the results into the memory pointed to by [iov]. The user data [d] will
364364- be returned by {!wait} or {!peek} upon completion.
416416+ be returned by {!wait} or {!get_cqe_nonblocking} upon completion.
417417+418418+ This performs a vectored read, reading data into multiple buffers in a single
419419+ operation. The completion's [result] field contains the total number of bytes
420420+ read across all buffers, or a negative error code.
365421366366- Requires [List.length iov <= Uring.iov_max] *)
422422+ @param file_offset File offset (see {!type:offset} for special values)
423423+ @param iov List of buffers to read into
424424+ @return [None] if the submission queue is full; otherwise [Some job]
425425+ @raise Invalid_argument if [List.length iov > Uring.iov_max] *)
367426368427val writev : 'a t -> file_offset:offset -> Unix.file_descr -> Cstruct.t list -> 'a -> 'a job option
369428(** [writev t ~file_offset fd iov d] will submit a [writev(2)] request to uring [t].
370429 It writes to absolute [file_offset] on the [fd] file descriptor from the
371430 the memory pointed to by [iov]. The user data [d] will be returned by
372372- {!wait} or {!peek} upon completion.
431431+ {!wait} or {!get_cqe_nonblocking} upon completion.
432432+433433+ This performs a vectored write, writing data from multiple buffers in a single
434434+ operation. The completion's [result] field contains the total number of bytes
435435+ written from all buffers, or a negative error code.
373436374374- Requires [List.length iov <= Uring.iov_max] *)
437437+ @param file_offset File offset (see {!type:offset} for special values)
438438+ @param iov List of buffers to write from
439439+ @return [None] if the submission queue is full; otherwise [Some job]
440440+ @raise Invalid_argument if [List.length iov > Uring.iov_max] *)
375441376442val read_fixed : 'a t -> file_offset:offset -> Unix.file_descr -> off:int -> len:int -> 'a -> 'a job option
377443(** [read t ~file_offset fd ~off ~len d] will submit a [read(2)] request to uring [t].
···395461396462val splice : 'a t -> src:Unix.file_descr -> dst:Unix.file_descr -> len:int -> 'a -> 'a job option
397463(** [splice t ~src ~dst ~len d] will submit a request to copy [len] bytes from [src] to [dst].
398398- The operation returns the number of bytes transferred, or 0 for end-of-input.
399399- The result is [EINVAL] if the file descriptors don't support splicing. *)
464464+465465+ This is a zero-copy data transfer between two file descriptors. At least one
466466+ must be a pipe. Data is moved without copying between kernel and user space.
467467+468468+ The completion's [result] field contains the number of bytes transferred on success,
469469+ 0 for end-of-input, or a negative error code.
470470+471471+ @param src Source file descriptor (can be a regular file or pipe)
472472+ @param dst Destination file descriptor (can be a regular file or pipe)
473473+ @param len Maximum number of bytes to transfer
474474+ @return [None] if the submission queue is full; otherwise [Some job] *)
400475401476module Statx : sig
402477 type t
···596671 (or the current directory if [fd] is not given). *)
597672598673val bind : 'a t -> Unix.file_descr -> Unix.sockaddr -> 'a -> 'a job option
599599-(** [bind t fd addr d] will submit a request to bind [fd] to [addr]. *)
674674+(** [bind t fd addr d] will submit a request to bind socket [fd] to network address [addr].
675675+676676+ This is an asynchronous version of bind(2). The socket should typically be created
677677+ with [Unix.SOCK_NONBLOCK] to work well with io_uring. The completion will have
678678+ [result = 0] on success, or a negative error code on failure.
679679+680680+ @return [None] if the submission queue is full; otherwise [Some job] *)
600681601682val listen : 'a t -> Unix.file_descr -> int -> 'a -> 'a job option
602602-(** [listen t fd backlog d] will submit a request to listen on [fd] with [backlog] maximum pending connections. *)
683683+(** [listen t fd backlog d] will submit a request to mark socket [fd] as passive,
684684+ ready to accept incoming connections.
685685+686686+ This is an asynchronous version of listen(2). The [backlog] parameter defines
687687+ the maximum length of the queue of pending connections. If a connection request
688688+ arrives when the queue is full, the client may receive an ECONNREFUSED error.
689689+ The completion will have [result = 0] on success.
690690+691691+ @param fd Socket file descriptor (must be already bound with {!bind})
692692+ @param backlog Maximum number of pending connections (often capped by system limits)
693693+ @return [None] if the submission queue is full; otherwise [Some job]
694694+ @raise Invalid_argument if [fd] is not a socket *)
603695604696val connect : 'a t -> Unix.file_descr -> Unix.sockaddr -> 'a -> 'a job option
605605-(** [connect t fd addr d] will submit a request to connect [fd] to [addr]. *)
697697+(** [connect t fd addr d] will submit a request to connect socket [fd] to [addr].
698698+699699+ This is an asynchronous version of connect(2). For non-blocking sockets,
700700+ the operation may initially return an error indicating the connection is
701701+ in progress, then completes with [result = 0] when established or a
702702+ negative error code on failure.
703703+704704+ @return [None] if the submission queue is full; otherwise [Some job] *)
606705607706(** Holder for the peer's address in {!accept}. *)
608707module Sockaddr : sig
···615714val accept : 'a t -> Unix.file_descr -> Sockaddr.t -> 'a -> 'a job option
616715(** [accept t fd addr d] will submit a request to accept a new connection on [fd].
617716 The new FD will be configured with [SOCK_CLOEXEC].
618618- The remote address will be stored in [addr]. *)
717717+ The remote address will be stored in [addr].
718718+719719+ This is an asynchronous version of accept4(2) with SOCK_CLOEXEC flag.
720720+ The completion's [result] field contains the new file descriptor on success,
721721+ or a negative error code on failure.
722722+723723+ @param fd Listening socket (must have called {!listen} first)
724724+ @param addr Pre-allocated storage for the peer address (create with {!Sockaddr.create})
725725+ @return [None] if the submission queue is full; otherwise [Some job] *)
619726620727val close : 'a t -> Unix.file_descr -> 'a -> 'a job option
728728+(** [close t fd d] will submit a request to close file descriptor [fd].
729729+730730+ This is an asynchronous version of close(2). The completion's [result]
731731+ field will be 0 on success or a negative error code.
732732+733733+ Note: Even on error, the file descriptor is considered closed and should
734734+ not be used again. The descriptor will not be reused until the operation
735735+ completes.
736736+737737+ @return [None] if the submission queue is full; otherwise [Some job] *)
621738622739val cancel : 'a t -> 'a job -> 'a -> 'a job option
623740(** [cancel t job d] submits a request to cancel [job].
624624- The cancel job itself returns 0 on success, or [ENOTFOUND]
625625- if [job] had already completed by the time the kernel processed the cancellation request.
626626- @raise Invalid_argument if the job has already been returned by e.g. {!wait}. *)
741741+742742+ Cancellation is asynchronous - the original operation may still complete
743743+ before the cancellation takes effect. Both the original operation and the
744744+ cancel operation will generate completion events.
745745+746746+ @param job The job handle returned when the operation was submitted
747747+ @return [None] if the submission queue is full; otherwise [Some cancel_job]
748748+ @raise Invalid_argument if the job has already been collected by {!wait} or {!get_cqe_nonblocking} *)
627749628750module Msghdr : sig
629751 type t
···645767(** [send_msg t fd buffs d] will submit a [sendmsg(2)] request. The [Msghdr] will be constructed
646768 from the FDs ([fds]), address ([dst]) and buffers ([buffs]).
647769648648- Requires [List.length buffs <= Uring.iov_max]
770770+ This is useful for:
771771+ - Sending to unconnected sockets (UDP) with [dst]
772772+ - Sending file descriptors over Unix domain sockets with [fds]
773773+ - Scatter-gather I/O with multiple buffers
649774650650- @param dst Destination address.
651651- @param fds Extra file descriptors to attach to the message. *)
775775+ The completion's [result] field contains the number of bytes sent on success,
776776+ or a negative error code.
777777+778778+ @param dst Destination address for unconnected sockets
779779+ @param fds File descriptors to send via SCM_RIGHTS (Unix domain sockets only)
780780+ @return [None] if the submission queue is full; otherwise [Some job]
781781+ @raise Invalid_argument if [List.length buffs > Uring.iov_max] *)
652782653783val recv_msg : 'a t -> Unix.file_descr -> Msghdr.t -> 'a -> 'a job option
654784(** [recv_msg t fd msghdr d] will submit a [recvmsg(2)] request. If the request is
655655- successful then the [msghdr] will contain the sender address and the data received. *)
785785+ successful then the [msghdr] will contain the sender address and the data received.
786786+787787+ This is useful for:
788788+ - Receiving from unconnected sockets (UDP) - sender address is stored
789789+ - Receiving file descriptors over Unix domain sockets
790790+ - Scatter-gather I/O with multiple buffers
791791+792792+ The completion's [result] field contains the number of bytes received on success,
793793+ or a negative error code. Use {!Msghdr.get_fds} to retrieve any received file
794794+ descriptors.
795795+796796+ @param msghdr Pre-allocated message header created with {!Msghdr.create}
797797+ @return [None] if the submission queue is full; otherwise [Some job] *)
656798657799val fsync : 'a t -> ?off:int64 -> ?len:int -> Unix.file_descr -> 'a -> 'a job option
658800(** [fsync t ?off ?len fd d] will submit an [fsync(2)] request, with the optional
659801 offset [off] and length [len] specifying the subset of the file to perform the
660660- synchronisation on. *)
802802+ synchronisation on.
803803+804804+ This ensures that all file data and metadata are durably stored on disk.
805805+ The completion's [result] field will be 0 on success or a negative error code.
806806+807807+ @param off Starting offset for sync range (requires kernel 5.2+)
808808+ @param len Length of range to sync; if both [off] and [len] are given,
809809+ only that range is synced (requires kernel 5.2+)
810810+ @return [None] if the submission queue is full; otherwise [Some job] *)
661811662812val fdatasync : 'a t -> ?off:int64 -> ?len:int -> Unix.file_descr -> 'a -> 'a job option
663813(** [fdatasync t ?off ?len fd d] will submit an [fdatasync(2)] request, with the optional
664814 offset [off] and length [len] specifying the subset of the file to perform the
665665- synchronisation on. *)
815815+ synchronisation on.
816816+817817+ Like {!fsync} but only ensures file data (not metadata) is durably stored.
818818+ This can be more efficient when file metadata (permissions, timestamps) hasn't changed.
819819+ The completion's [result] field will be 0 on success or a negative error code.
820820+821821+ @param off Starting offset for sync range (requires kernel 5.2+)
822822+ @param len Length of range to sync
823823+ @return [None] if the submission queue is full; otherwise [Some job] *)
666824667825(** {2 Probing}
668826···704862705863val register_eventfd : 'a t -> Unix.file_descr -> unit
706864(** [register_eventfd t fd] will register an eventfd to the the uring [t].
707707- See documentation for io_uring_register_eventfd *)
865865+866866+ When a completion event is posted to the CQ ring, the eventfd will be signaled.
867867+ This allows integration with event loops like epoll/select. The eventfd should
868868+ be created with [Unix.eventfd] or similar.
869869+870870+ Only one eventfd can be registered per ring. Registering a new one replaces
871871+ the previous registration.
872872+873873+ @param fd An eventfd file descriptor
874874+ @raise Unix.Unix_error on registration failure *)
708875709876val error_of_errno : int -> Unix.error
710877(** [error_of_errno e] converts the error code [abs e] to a Unix error type. *)
711878712879val active_ops : _ t -> int
713880(** [active_ops t] returns the number of operations added to the ring (whether submitted or not)
714714- for which the completion event has not yet been collected. *)
881881+ for which the completion event has not yet been collected.
882882+883883+ This is useful for:
884884+ - Ensuring all operations complete before calling {!exit}
885885+ - Monitoring ring utilization
886886+ - Detecting potential ring overflow conditions
887887+888888+ The count includes operations that are queued but not submitted, submitted
889889+ but not completed, and completed but not collected via {!wait} or {!get_cqe_nonblocking}. *)
715890716891val sqe_ready : _ t -> int
717892(** [sqe_ready t] is the number of unconsumed (if SQPOLL) or unsubmitted entries in the SQ ring. *)