Pure OCaml B-tree implementation for persistent storage
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

chore: accumulated lint fixes, test stubs, and interface files

- Fix E325 to skip type variables when checking get/find naming
- Add test.ml runners for bpsec, bytesrw-eio, cfdp, claude-skills
- Add .mli files for btree lib modules and test modules
- Add .mli files for cbort, cgr, bundle, bpsec, bytesrw-eio
- Add claudeio test module stubs and .mli files
- Add claudeio test/proto/dune for outgoing tests
- Fix claudeio examples Test_json_utils -> Json_utils references
- Add linkedin URL parsing module and tests
- Improve linkedin profile scraping and cookie handling
- Fix claude-skills main.ml lint issues
- Fix various .mli doc comment formatting

+290
+55
lib/cell.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Thomas Gazagnaire. All rights reserved. 3 + SPDX-License-Identifier: MIT 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** B-tree cell parsing and encoding. *) 7 + 8 + type table_leaf = { 9 + rowid : int64; 10 + payload : string; 11 + overflow_page : int option; 12 + } 13 + (** Table leaf cell: rowid + payload. *) 14 + 15 + type table_interior = { left_child : int; rowid : int64 } 16 + (** Table interior cell: child page + rowid. *) 17 + 18 + type index_leaf = { payload : string; overflow_page : int option } 19 + (** Index leaf cell: payload only. *) 20 + 21 + type index_interior = { 22 + left_child : int; 23 + payload : string; 24 + overflow_page : int option; 25 + } 26 + (** Index interior cell: child page + payload. *) 27 + 28 + val max_local : usable_size:int -> is_table:bool -> int 29 + val min_local : usable_size:int -> int 30 + 31 + val parse_table_leaf : string -> int -> usable_size:int -> table_leaf * int 32 + (** [parse_table_leaf buf off ~usable_size] parses a table leaf cell. Returns 33 + [(cell, bytes_consumed)]. *) 34 + 35 + val parse_table_interior : string -> int -> table_interior * int 36 + (** [parse_table_interior buf off] parses a table interior cell. *) 37 + 38 + val parse_index_leaf : string -> int -> usable_size:int -> index_leaf * int 39 + (** [parse_index_leaf buf off ~usable_size] parses an index leaf cell. *) 40 + 41 + val parse_index_leaf_raw : 42 + string -> int -> usable_size:int -> int * string * int option * int 43 + (** [parse_index_leaf_raw buf off ~usable_size] returns 44 + [(payload_size, local_payload, overflow_page, bytes_consumed)]. *) 45 + 46 + val parse_index_interior : 47 + string -> int -> usable_size:int -> index_interior * int 48 + (** [parse_index_interior buf off ~usable_size] parses an index interior cell. 49 + *) 50 + 51 + val parse_index_interior_raw : 52 + string -> int -> usable_size:int -> int * int * string * int option * int 53 + (** [parse_index_interior_raw buf off ~usable_size] returns 54 + [(left_child, payload_size, local_payload, overflow_page, bytes_consumed)]. 55 + *)
+41
lib/index.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Thomas Gazagnaire. All rights reserved. 3 + SPDX-License-Identifier: MIT 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Index B-tree for SQLite key storage. *) 7 + 8 + type t 9 + (** An index B-tree for string keys. *) 10 + 11 + val create : Pager.t -> t 12 + (** [create pager] creates a new empty index B-tree. *) 13 + 14 + val open_ : Pager.t -> root_page:int -> t 15 + (** [open_ pager ~root_page] opens an existing index B-tree. *) 16 + 17 + val root_page : t -> int 18 + (** [root_page t] returns the root page number. *) 19 + 20 + val mem : t -> string -> bool 21 + (** [mem t key] returns true if [key] exists in the index. *) 22 + 23 + val find : t -> string -> string option 24 + (** [find t key] returns the payload for [key] if it exists. *) 25 + 26 + val insert : t -> string -> unit 27 + (** [insert t key] inserts a key. If the key already exists, this is a no-op 28 + (set semantics). *) 29 + 30 + val delete : t -> string -> unit 31 + (** [delete t key] removes a key. *) 32 + 33 + val find_by_prefix : t -> string -> string option 34 + (** [find_by_prefix t prefix] finds the first entry starting with [prefix]. *) 35 + 36 + val delete_by_prefix : t -> string -> unit 37 + (** [delete_by_prefix t prefix] deletes the first entry starting with [prefix]. 38 + *) 39 + 40 + val iter : t -> (string -> unit) -> unit 41 + (** [iter t f] calls [f key] for each key in sorted order. *)
+66
lib/page.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Thomas Gazagnaire. All rights reserved. 3 + SPDX-License-Identifier: MIT 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** B-tree page types and header parsing. *) 7 + 8 + type page_type = 9 + | Interior_index (** 0x02 *) 10 + | Interior_table (** 0x05 *) 11 + | Leaf_index (** 0x0a *) 12 + | Leaf_table (** 0x0d *) 13 + 14 + val pp_page_type : Format.formatter -> page_type -> unit 15 + val page_type_of_byte : int -> page_type 16 + val byte_of_page_type : page_type -> int 17 + val is_interior : page_type -> bool 18 + 19 + val header_size : page_type -> int 20 + (** [header_size typ] is 8 for leaf pages, 12 for interior pages. *) 21 + 22 + type header = { 23 + page_type : page_type; 24 + first_freeblock : int; 25 + cell_count : int; 26 + cell_content_start : int; 27 + fragmented_bytes : int; 28 + right_child : int option; (** Interior pages only *) 29 + } 30 + (** Page header. *) 31 + 32 + val parse_header : string -> int -> header 33 + (** [parse_header buf off] parses a page header starting at [off]. *) 34 + 35 + val free_space : header -> page_type:page_type -> int 36 + (** [free_space header ~page_type] returns the free space in the page. *) 37 + 38 + val init : page_size:int -> page_type:page_type -> bytes 39 + (** [init ~page_size ~page_type] creates a new empty page buffer. *) 40 + 41 + (** {1 Binary helpers} *) 42 + 43 + val get_u16_be : string -> int -> int 44 + val get_u32_be : string -> int -> int 45 + val set_u16_be : bytes -> int -> int -> unit 46 + val set_u32_be : bytes -> int -> int -> unit 47 + 48 + (** {1 Cell operations} *) 49 + 50 + val write_cell : bytes -> cell_content_start:int -> cell:string -> int 51 + (** [write_cell buf ~cell_content_start ~cell] writes a cell and returns the new 52 + cell content start. *) 53 + 54 + val cell_pointers : string -> int -> header -> int array 55 + (** [cell_pointers page header_offset header] returns cell pointer array. *) 56 + 57 + val insert_cell_pointer : 58 + bytes -> 59 + header_offset:int -> 60 + page_type:page_type -> 61 + cell_count:int -> 62 + index:int -> 63 + ptr:int -> 64 + unit 65 + (** [insert_cell_pointer buf ~header_offset ~page_type ~cell_count ~index ~ptr] 66 + inserts a cell pointer at [index], shifting others right. *)
+30
lib/pager.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Thomas Gazagnaire. All rights reserved. 3 + SPDX-License-Identifier: MIT 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Page cache and file I/O for B-tree storage. *) 7 + 8 + type t 9 + (** A pager manages page-level I/O with caching. *) 10 + 11 + val create : page_size:int -> Eio.File.rw_ty Eio.Resource.t -> t 12 + (** [create ~page_size file] creates a pager with the given page size. *) 13 + 14 + val page_size : t -> int 15 + (** [page_size t] returns the page size. *) 16 + 17 + val page_count : t -> int 18 + (** [page_count t] returns the number of pages in the file. *) 19 + 20 + val read : t -> int -> string 21 + (** [read t page_num] reads page [page_num] (1-indexed). *) 22 + 23 + val write : t -> int -> string -> unit 24 + (** [write t page_num data] writes [data] to page [page_num]. *) 25 + 26 + val allocate : t -> int 27 + (** [allocate t] allocates a new page and returns its number. *) 28 + 29 + val sync : t -> unit 30 + (** [sync t] syncs all dirty pages to disk. *)
+42
lib/record.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Thomas Gazagnaire. All rights reserved. 3 + SPDX-License-Identifier: MIT 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** SQLite record format encoding and decoding. *) 7 + 8 + (** Serial types determine how column values are stored. *) 9 + type serial_type = 10 + | Null 11 + | Int8 12 + | Int16 13 + | Int24 14 + | Int32 15 + | Int48 16 + | Int64 17 + | Float64 18 + | Zero 19 + | One 20 + | Blob of int 21 + | Text of int 22 + 23 + (** Column values. *) 24 + type value = 25 + | Vnull 26 + | Vint of int64 27 + | Vfloat of float 28 + | Vblob of string 29 + | Vtext of string 30 + 31 + val serial_type_of_int : int -> serial_type 32 + val serial_type_of_value : value -> int * int 33 + val decode_int : string -> int -> int -> int64 34 + val encode_int : bytes -> int -> int64 -> int -> unit 35 + 36 + val decode : string -> value list 37 + (** [decode payload] decodes a record from its payload bytes. *) 38 + 39 + val encode : value list -> string 40 + (** [encode values] encodes values as a record. *) 41 + 42 + val pp_value : Format.formatter -> value -> unit
+33
lib/table.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Thomas Gazagnaire. All rights reserved. 3 + SPDX-License-Identifier: MIT 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Table B-tree for SQLite row storage. *) 7 + 8 + type t 9 + (** A table B-tree keyed by rowid. *) 10 + 11 + val create : Pager.t -> t 12 + (** [create pager] creates a new empty table B-tree. *) 13 + 14 + val open_ : Pager.t -> root_page:int -> t 15 + (** [open_ pager ~root_page] opens an existing table B-tree. *) 16 + 17 + val root_page : t -> int 18 + (** [root_page t] returns the root page number. *) 19 + 20 + val find : t -> int64 -> string option 21 + (** [find t rowid] finds the record with the given rowid. *) 22 + 23 + val insert : t -> rowid:int64 -> string -> unit 24 + (** [insert t ~rowid data] inserts or updates a record. *) 25 + 26 + val delete : t -> int64 -> unit 27 + (** [delete t rowid] deletes the record with the given rowid. *) 28 + 29 + val iter : t -> (int64 -> string -> unit) -> unit 30 + (** [iter t f] calls [f rowid data] for each record in order. *) 31 + 32 + val fold : t -> init:'a -> f:(int64 -> string -> 'a -> 'a) -> 'a 33 + (** [fold t ~init ~f] folds over all records in order. *)
+16
lib/varint.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Thomas Gazagnaire. All rights reserved. 3 + SPDX-License-Identifier: MIT 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** SQLite-style variable-length integer encoding. *) 7 + 8 + val decode : string -> int -> int64 * int 9 + (** [decode buf off] decodes a varint starting at [off] in [buf]. Returns 10 + [(value, bytes_consumed)]. *) 11 + 12 + val encode : int64 -> string 13 + (** [encode n] encodes [n] as a varint. *) 14 + 15 + val size : int64 -> int 16 + (** [size n] returns the number of bytes needed to encode [n]. *)
+1
test/test_cell.mli
··· 1 + val suite : string * unit Alcotest.test_case list
+1
test/test_index.mli
··· 1 + val suite : string * unit Alcotest.test_case list
+1
test/test_page.mli
··· 1 + val suite : string * unit Alcotest.test_case list
+1
test/test_pager.mli
··· 1 + val suite : string * unit Alcotest.test_case list
+1
test/test_record.mli
··· 1 + val suite : string * unit Alcotest.test_case list
+1
test/test_table.mli
··· 1 + val suite : string * unit Alcotest.test_case list
+1
test/test_varint.mli
··· 1 + val suite : string * unit Alcotest.test_case list