Pure OCaml B-tree implementation for persistent storage
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat(btree): refactor index and table, expand tests

+253 -70
+87 -52
lib/index.ml
··· 145 145 Bytes.unsafe_to_string cell 146 146 end 147 147 148 - (* Encode an index interior cell *) 149 - let encode_index_interior_cell ~left_child ~payload = 150 - let payload_size_varint = 151 - Varint.encode (Int64.of_int (String.length payload)) 152 - in 153 - let cell = 154 - Bytes.create (4 + String.length payload_size_varint + String.length payload) 155 - in 156 - Page.set_u32_be cell 0 left_child; 157 - Bytes.blit_string payload_size_varint 0 cell 4 158 - (String.length payload_size_varint); 159 - Bytes.blit_string payload 0 cell 160 - (4 + String.length payload_size_varint) 161 - (String.length payload); 162 - Bytes.unsafe_to_string cell 148 + (* Encode an index interior cell - handles overflow for large payloads *) 149 + let encode_interior_cell_overflow t ~left_child ~payload = 150 + let payload_size = String.length payload in 151 + let usable_size = usable_size t in 152 + let max_local = Cell.max_local ~usable_size ~is_table:false in 153 + let min_local = Cell.min_local ~usable_size in 154 + let payload_size_varint = Varint.encode (Int64.of_int payload_size) in 155 + let varint_len = String.length payload_size_varint in 156 + if payload_size <= max_local then begin 157 + let cell = Bytes.create (4 + varint_len + payload_size) in 158 + Page.set_u32_be cell 0 left_child; 159 + Bytes.blit_string payload_size_varint 0 cell 4 varint_len; 160 + Bytes.blit_string payload 0 cell (4 + varint_len) payload_size; 161 + Bytes.unsafe_to_string cell 162 + end 163 + else begin 164 + let k = min_local + ((payload_size - min_local) mod (usable_size - 4)) in 165 + let local_size = if k <= max_local then k else min_local in 166 + let overflow_page = 167 + write_overflow_chain t.pager payload ~offset:local_size 168 + in 169 + let cell = Bytes.create (4 + varint_len + local_size + 4) in 170 + Page.set_u32_be cell 0 left_child; 171 + Bytes.blit_string payload_size_varint 0 cell 4 varint_len; 172 + Bytes.blit_string payload 0 cell (4 + varint_len) local_size; 173 + Page.set_u32_be cell (4 + varint_len + local_size) overflow_page; 174 + Bytes.unsafe_to_string cell 175 + end 163 176 164 177 (* Write a cell into a page buffer, returns new cell_content_start *) 165 178 let write_cell buf ~cell_content_start ~cell = ··· 206 219 if lo > hi then false 207 220 else 208 221 let mid = (lo + hi) / 2 in 209 - let cell, _ = 210 - Cell.parse_index_leaf page ptrs.(mid) ~usable_size:usable 222 + let full_payload = 223 + read_full_payload t page ptrs.(mid) ~usable_size:usable 211 224 in 212 - let cmp = String.compare key cell.Cell.payload in 225 + let cmp = String.compare key full_payload in 213 226 if cmp = 0 then true 214 227 else if cmp < 0 then search lo (mid - 1) 215 228 else search (mid + 1) hi ··· 219 232 let rec find_child i = 220 233 if i >= header.Page.cell_count then Option.get header.Page.right_child 221 234 else 235 + let full_payload = 236 + read_full_interior_payload t page ptrs.(i) ~usable_size:usable 237 + in 222 238 let cell, _ = 223 239 Cell.parse_index_interior page ptrs.(i) ~usable_size:usable 224 240 in 225 - if key_less_than key cell.Cell.payload then cell.Cell.left_child 241 + if key_less_than key full_payload then cell.Cell.left_child 226 242 else find_child (i + 1) 227 243 in 228 244 mem_in_page t (find_child 0) key ··· 337 353 in 338 354 loop 0 339 355 340 - (* Recompute cell_content_start after truncating a page to [count] cells *) 341 - let recompute_cell_content_start buf ~ptrs ~count ~page_size = 342 - if count > 0 then begin 343 - let min_ptr = ref page_size in 344 - for i = 0 to count - 1 do 345 - if ptrs.(i) < !min_ptr then min_ptr := ptrs.(i) 346 - done; 347 - Page.set_u16_be buf 5 !min_ptr 348 - end 349 - else Page.set_u16_be buf 5 page_size 350 - 351 356 (* Find insertion position for a key among interior cells *) 352 - let insert_pos page ptrs ~cell_count ~usable_size ~key = 357 + let insert_pos t page ptrs ~cell_count ~usable_size ~key = 353 358 let rec find i = 354 359 if i >= cell_count then i 355 360 else 356 - let c, _ = Cell.parse_index_interior page ptrs.(i) ~usable_size in 357 - if key < c.Cell.payload then i else find (i + 1) 361 + let full_payload = 362 + read_full_interior_payload t page ptrs.(i) ~usable_size 363 + in 364 + if key < full_payload then i else find (i + 1) 358 365 in 359 366 find 0 360 367 ··· 425 432 Page.set_u16_be new_buf 5 !new_cell_content_start; 426 433 Pager.write t.pager new_page_num (Bytes.unsafe_to_string new_buf); 427 434 428 - (* Update original page to only have cells [0..split_idx-1] *) 429 - let old_buf = Bytes.of_string page in 435 + (* Compact original page: repack remaining cells [0..split_idx-1] *) 436 + let old_buf = Page.init ~page_size ~kind:Page.Leaf_index in 437 + let old_cell_start = ref page_size in 438 + for i = 0 to split_idx - 1 do 439 + let _payload_size, _local, _overflow, cell_size = 440 + Cell.parse_index_leaf_raw page ptrs.(i) ~usable_size:usable 441 + in 442 + let raw_cell = String.sub page ptrs.(i) cell_size in 443 + old_cell_start := 444 + write_cell old_buf ~cell_content_start:!old_cell_start ~cell:raw_cell; 445 + let ptr_off = Page.header_size Page.Leaf_index + (i * 2) in 446 + Page.set_u16_be old_buf ptr_off !old_cell_start 447 + done; 430 448 Page.set_u16_be old_buf 3 split_idx; 431 - recompute_cell_content_start old_buf ~ptrs ~count:split_idx ~page_size; 449 + Page.set_u16_be old_buf 5 !old_cell_start; 432 450 Pager.write t.pager page_num (Bytes.unsafe_to_string old_buf); 433 451 434 452 { new_page = new_page_num; separator_key } ··· 446 464 let sep_cell, _ = 447 465 Cell.parse_index_interior page ptrs.(split_idx) ~usable_size:usable 448 466 in 449 - let separator_key = sep_cell.Cell.payload in 467 + let separator_key = 468 + read_full_interior_payload t page ptrs.(split_idx) ~usable_size:usable 469 + in 450 470 451 471 (* Create new right page *) 452 472 let new_page_num = Pager.allocate t.pager in 453 473 let new_buf = Page.init ~page_size ~kind:Page.Interior_index in 454 474 455 - (* Cells [split_idx+1..cell_count-1] go to new page *) 475 + (* Cells [split_idx+1..cell_count-1] go to new page (raw copy preserves overflow) *) 456 476 let new_cell_content_start = ref page_size in 457 477 for i = split_idx + 1 to header.Page.cell_count - 1 do 458 478 let cell_off = ptrs.(i) in 459 - let cell, _ = Cell.parse_index_interior page cell_off ~usable_size:usable in 460 - let cell_data = 461 - encode_index_interior_cell ~left_child:cell.Cell.left_child 462 - ~payload:cell.Cell.payload 479 + let _left, _payload_size, _local, _overflow, cell_size = 480 + Cell.parse_index_interior_raw page cell_off ~usable_size:usable 463 481 in 482 + let raw_cell = String.sub page cell_off cell_size in 464 483 new_cell_content_start := 465 484 write_cell new_buf ~cell_content_start:!new_cell_content_start 466 - ~cell:cell_data; 485 + ~cell:raw_cell; 467 486 let new_idx = i - split_idx - 1 in 468 487 let ptr_off = Page.header_size Page.Interior_index + (new_idx * 2) in 469 488 Page.set_u16_be new_buf ptr_off !new_cell_content_start ··· 475 494 Page.set_u32_be new_buf 8 (Option.get header.Page.right_child); 476 495 Pager.write t.pager new_page_num (Bytes.unsafe_to_string new_buf); 477 496 478 - (* Update original page: keep cells [0..split_idx-1], right child = sep_cell.left_child *) 479 - let old_buf = Bytes.of_string page in 497 + (* Compact original page: repack remaining cells [0..split_idx-1] *) 498 + let old_buf = Page.init ~page_size ~kind:Page.Interior_index in 499 + let old_cell_start = ref page_size in 500 + for i = 0 to split_idx - 1 do 501 + let _left, _payload_size, _local, _overflow, cell_size = 502 + Cell.parse_index_interior_raw page ptrs.(i) ~usable_size:usable 503 + in 504 + let raw_cell = String.sub page ptrs.(i) cell_size in 505 + old_cell_start := 506 + write_cell old_buf ~cell_content_start:!old_cell_start ~cell:raw_cell; 507 + let ptr_off = Page.header_size Page.Interior_index + (i * 2) in 508 + Page.set_u16_be old_buf ptr_off !old_cell_start 509 + done; 480 510 Page.set_u16_be old_buf 3 split_idx; 511 + Page.set_u16_be old_buf 5 !old_cell_start; 481 512 Page.set_u32_be old_buf 8 sep_cell.Cell.left_child; 482 - recompute_cell_content_start old_buf ~ptrs ~count:split_idx ~page_size; 483 513 Pager.write t.pager page_num (Bytes.unsafe_to_string old_buf); 484 514 485 515 { new_page = new_page_num; separator_key } ··· 489 519 = 490 520 let page = Pager.read t.pager page_num in 491 521 let header = Page.parse_header page 0 in 492 - let cell = encode_index_interior_cell ~left_child ~payload:separator_key in 522 + let cell = 523 + encode_interior_cell_overflow t ~left_child ~payload:separator_key 524 + in 493 525 let cell_len = String.length cell in 494 526 let space_needed = cell_len + 2 in 495 527 (* cell + pointer *) ··· 500 532 let ptrs = cell_pointers page header in 501 533 let usable = usable_size t in 502 534 let insert_idx = 503 - insert_pos page ptrs ~cell_count:header.Page.cell_count 535 + insert_pos t page ptrs ~cell_count:header.Page.cell_count 504 536 ~usable_size:usable ~key:separator_key 505 537 in 506 538 ignore ··· 579 611 580 612 (* Single cell pointing to left page *) 581 613 let cell = 582 - encode_index_interior_cell ~left_child:left_page ~payload:separator_key 614 + encode_interior_cell_overflow t ~left_child:left_page 615 + ~payload:separator_key 583 616 in 584 617 let cell_start = write_cell buf ~cell_content_start:page_size ~cell in 585 618 Page.set_u16_be buf 5 cell_start; ··· 646 679 done; 647 680 648 681 (* Decrease cell count *) 649 - Page.set_u16_be buf 3 (header.Page.cell_count - 1); 682 + let new_count = header.Page.cell_count - 1 in 683 + Page.set_u16_be buf 3 new_count; 650 684 651 - (* Note: We don't reclaim space - it becomes fragmented. 652 - A proper implementation would compact or track free space. *) 685 + (* Reset cell_content_start when page becomes empty *) 686 + if new_count = 0 then Page.set_u16_be buf 5 (Pager.page_size t.pager); 687 + 653 688 Pager.write t.pager page_num (Bytes.unsafe_to_string buf) 654 689 end 655 690
+26 -17
lib/table.ml
··· 132 132 133 133 let find t rowid = search_page t t.root_page rowid 134 134 135 - (* Recompute cell_content_start after truncating a page to [count] cells *) 136 - let recompute_cell_content_start buf ~ptrs ~count ~page_size = 137 - if count > 0 then begin 138 - let min_ptr = ref page_size in 139 - for i = 0 to count - 1 do 140 - if ptrs.(i) < !min_ptr then min_ptr := ptrs.(i) 141 - done; 142 - Page.set_u16_be buf 5 !min_ptr 143 - end 144 - else Page.set_u16_be buf 5 page_size 145 - 146 135 (* Find insertion position for a rowid among interior cells *) 147 136 let insert_pos page ptrs ~cell_count ~rowid = 148 137 let rec find i = ··· 224 213 Page.set_u16_be new_buf 5 !new_cell_content_start; 225 214 Pager.write t.pager new_page_num (Bytes.unsafe_to_string new_buf); 226 215 227 - (* Update original page to only have cells [0..split_idx-1] *) 228 - let old_buf = Bytes.of_string page in 216 + (* Compact original page: repack remaining cells [0..split_idx-1] *) 217 + let old_buf = Page.init ~page_size ~kind:Page.Leaf_table in 218 + let old_cell_start = ref page_size in 219 + for i = 0 to split_idx - 1 do 220 + let _cell, cell_size = 221 + Cell.parse_table_leaf page ptrs.(i) ~usable_size:usable 222 + in 223 + let raw_cell = String.sub page ptrs.(i) cell_size in 224 + old_cell_start := 225 + write_cell old_buf ~cell_content_start:!old_cell_start ~cell:raw_cell; 226 + let ptr_off = Page.header_size Page.Leaf_table + (i * 2) in 227 + Page.set_u16_be old_buf ptr_off !old_cell_start 228 + done; 229 229 Page.set_u16_be old_buf 3 split_idx; 230 - recompute_cell_content_start old_buf ~ptrs ~count:split_idx ~page_size; 230 + Page.set_u16_be old_buf 5 !old_cell_start; 231 231 Pager.write t.pager page_num (Bytes.unsafe_to_string old_buf); 232 232 233 233 { new_page = new_page_num; separator_rowid } ··· 272 272 Page.set_u32_be new_buf 8 (Option.get header.Page.right_child); 273 273 Pager.write t.pager new_page_num (Bytes.unsafe_to_string new_buf); 274 274 275 - (* Update original page: keep cells [0..split_idx-1], right child = sep_cell.left_child *) 276 - let old_buf = Bytes.of_string page in 275 + (* Compact original page: repack remaining cells [0..split_idx-1] *) 276 + let old_buf = Page.init ~page_size ~kind:Page.Interior_table in 277 + let old_cell_start = ref page_size in 278 + for i = 0 to split_idx - 1 do 279 + let _cell, cell_size = Cell.parse_table_interior page ptrs.(i) in 280 + let raw_cell = String.sub page ptrs.(i) cell_size in 281 + old_cell_start := 282 + write_cell old_buf ~cell_content_start:!old_cell_start ~cell:raw_cell; 283 + let ptr_off = Page.header_size Page.Interior_table + (i * 2) in 284 + Page.set_u16_be old_buf ptr_off !old_cell_start 285 + done; 277 286 Page.set_u16_be old_buf 3 split_idx; 287 + Page.set_u16_be old_buf 5 !old_cell_start; 278 288 Page.set_u32_be old_buf 8 sep_cell.Cell.left_child; 279 - recompute_cell_content_start old_buf ~ptrs ~count:split_idx ~page_size; 280 289 Pager.write t.pager page_num (Bytes.unsafe_to_string old_buf); 281 290 282 291 { new_page = new_page_num; separator_rowid }
+140 -1
test/test_index.ml
··· 1 - let suite = ("index", []) 1 + let with_mem_pager ?(page_size = 4096) f = 2 + let pager = Btree.Pager.mem ~page_size () in 3 + f pager 4 + 5 + let test_insert_find () = 6 + with_mem_pager @@ fun pager -> 7 + let idx = Btree.Index.v pager in 8 + Btree.Index.insert idx "hello"; 9 + Btree.Index.insert idx "world"; 10 + Alcotest.(check bool) "mem hello" true (Btree.Index.mem idx "hello"); 11 + Alcotest.(check bool) "mem world" true (Btree.Index.mem idx "world"); 12 + Alcotest.(check bool) "no missing" false (Btree.Index.mem idx "missing") 13 + 14 + let test_many_short_keys () = 15 + with_mem_pager ~page_size:512 @@ fun pager -> 16 + let idx = Btree.Index.v pager in 17 + let n = 200 in 18 + for i = 0 to n - 1 do 19 + Btree.Index.insert idx (Fmt.str "key_%04d" i) 20 + done; 21 + for i = 0 to n - 1 do 22 + let key = Fmt.str "key_%04d" i in 23 + Alcotest.(check bool) (Fmt.str "mem %s" key) true (Btree.Index.mem idx key) 24 + done; 25 + let count = ref 0 in 26 + Btree.Index.iter idx (fun _ -> incr count); 27 + Alcotest.(check int) "count" n !count 28 + 29 + (* This test reproduces the page overflow bug: long keys on a 4096-byte page 30 + cause splits where dead cells in the original page waste space, leaving 31 + insufficient room for the new cell. *) 32 + let test_long_keys_page_overflow () = 33 + with_mem_pager @@ fun pager -> 34 + let idx = Btree.Index.v pager in 35 + let n = 100 in 36 + for i = 0 to n - 1 do 37 + (* Keys similar to ATProto MST entries: ~80 bytes each *) 38 + let key = Fmt.str "blocks\x00bafyreig%064d\x00value_data_%d" i i in 39 + Btree.Index.insert idx key 40 + done; 41 + for i = 0 to n - 1 do 42 + let key = Fmt.str "blocks\x00bafyreig%064d\x00value_data_%d" i i in 43 + Alcotest.(check bool) (Fmt.str "mem %d" i) true (Btree.Index.mem idx key) 44 + done; 45 + let count = ref 0 in 46 + Btree.Index.iter idx (fun _ -> incr count); 47 + Alcotest.(check int) "count" n !count 48 + 49 + let test_reverse_order () = 50 + with_mem_pager ~page_size:512 @@ fun pager -> 51 + let idx = Btree.Index.v pager in 52 + let n = 150 in 53 + for i = n - 1 downto 0 do 54 + Btree.Index.insert idx (Fmt.str "r%04d" i) 55 + done; 56 + for i = 0 to n - 1 do 57 + let key = Fmt.str "r%04d" i in 58 + Alcotest.(check bool) key true (Btree.Index.mem idx key) 59 + done 60 + 61 + let test_random_order () = 62 + with_mem_pager ~page_size:512 @@ fun pager -> 63 + let idx = Btree.Index.v pager in 64 + let n = 200 in 65 + let keys = Array.init n (fun i -> Fmt.str "k%04d" i) in 66 + (* LCG shuffle *) 67 + let state = ref 42 in 68 + for i = n - 1 downto 1 do 69 + state := ((!state * 1103515245) + 12345) land 0x7fffffff; 70 + let j = !state mod (i + 1) in 71 + let tmp = keys.(i) in 72 + keys.(i) <- keys.(j); 73 + keys.(j) <- tmp 74 + done; 75 + Array.iter (fun k -> Btree.Index.insert idx k) keys; 76 + for i = 0 to n - 1 do 77 + let key = Fmt.str "k%04d" i in 78 + Alcotest.(check bool) key true (Btree.Index.mem idx key) 79 + done 80 + 81 + let test_iter_sorted () = 82 + with_mem_pager ~page_size:512 @@ fun pager -> 83 + let idx = Btree.Index.v pager in 84 + let n = 100 in 85 + for i = n - 1 downto 0 do 86 + Btree.Index.insert idx (Fmt.str "%04d" i) 87 + done; 88 + let prev = ref "" in 89 + Btree.Index.iter idx (fun key -> 90 + if key <= !prev then Alcotest.failf "out of order: %S after %S" key !prev; 91 + prev := key) 92 + 93 + let test_delete () = 94 + with_mem_pager @@ fun pager -> 95 + let idx = Btree.Index.v pager in 96 + for i = 0 to 19 do 97 + Btree.Index.insert idx (Fmt.str "key_%02d" i) 98 + done; 99 + Btree.Index.delete idx "key_10"; 100 + Alcotest.(check bool) "deleted" false (Btree.Index.mem idx "key_10"); 101 + Alcotest.(check bool) "kept" true (Btree.Index.mem idx "key_09") 102 + 103 + let test_prefix_search () = 104 + with_mem_pager @@ fun pager -> 105 + let idx = Btree.Index.v pager in 106 + Btree.Index.insert idx "app.bsky.feed.post/abc"; 107 + Btree.Index.insert idx "app.bsky.feed.post/def"; 108 + Btree.Index.insert idx "app.bsky.actor.profile/self"; 109 + let result = Btree.Index.by_prefix idx "app.bsky.feed.post/" in 110 + Alcotest.(check bool) "found prefix" true (Option.is_some result) 111 + 112 + let test_overflow_cells () = 113 + (* Keys large enough to require overflow pages *) 114 + with_mem_pager @@ fun pager -> 115 + let idx = Btree.Index.v pager in 116 + let n = 20 in 117 + for i = 0 to n - 1 do 118 + let key = Fmt.str "%04d%s" i (String.make 2000 'x') in 119 + Btree.Index.insert idx key 120 + done; 121 + for i = 0 to n - 1 do 122 + let key = Fmt.str "%04d%s" i (String.make 2000 'x') in 123 + Alcotest.(check bool) 124 + (Fmt.str "overflow %d" i) true (Btree.Index.mem idx key) 125 + done 126 + 127 + let suite = 128 + ( "index", 129 + [ 130 + Alcotest.test_case "insert/find" `Quick test_insert_find; 131 + Alcotest.test_case "many short keys" `Quick test_many_short_keys; 132 + Alcotest.test_case "long keys (page overflow)" `Quick 133 + test_long_keys_page_overflow; 134 + Alcotest.test_case "reverse order" `Quick test_reverse_order; 135 + Alcotest.test_case "random order" `Quick test_random_order; 136 + Alcotest.test_case "iter sorted" `Quick test_iter_sorted; 137 + Alcotest.test_case "delete" `Quick test_delete; 138 + Alcotest.test_case "prefix search" `Quick test_prefix_search; 139 + Alcotest.test_case "overflow cells" `Quick test_overflow_cells; 140 + ] )