Pure OCaml B-tree implementation for persistent storage
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix(btree): implement overflow pages per SQLite spec

Cell payloads exceeding max_local (U-35 for table leaves) now spill
to linked overflow pages. The read path reassembles the full payload
by following the overflow chain. This fixes crashes when large cells
(>50% of page) caused splits with insufficient room.

Key changes:
- encode_table_leaf_cell: writes overflow chain when P > max_local
- full_payload: reads overflow chain to reassemble data
- Cell.table_leaf: exposes payload_size for reassembly
- insert_into_leaf: handles degenerate single-cell splits

56 tests including SQLite spec vectors:
- Overflow threshold formulas (X, M, K) for all page sizes
- Boundary tests at exact max_local (4061 bytes for 4096 pages)
- Multi-page overflow chains
- All valid page sizes (512..65536)
- Record serial type codes, varint 9-byte encoding
- Stress test: 50 cells of increasing size with overflow + splits

+505 -32
+2 -1
lib/btree.mli
··· 87 87 type table_leaf = { 88 88 rowid : int64; 89 89 payload : string; 90 + payload_size : int; 90 91 overflow_page : int option; 91 92 } 92 - (** Table leaf cell: rowid + payload *) 93 + (** Table leaf cell: rowid + payload (local portion) + total payload size *) 93 94 94 95 type table_interior = { left_child : int; rowid : int64 } 95 96 (** Table interior cell: child page + rowid *)
+2 -1
lib/cell.ml
··· 8 8 type table_leaf = { 9 9 rowid : int64; 10 10 payload : string; 11 + payload_size : int; 11 12 overflow_page : int option; 12 13 } 13 14 ··· 45 46 let total_consumed = 46 47 header_len + local_size + if overflow_page = None then 0 else 4 47 48 in 48 - ({ rowid; payload; overflow_page }, total_consumed) 49 + ({ rowid; payload; payload_size; overflow_page }, total_consumed) 49 50 50 51 let parse_table_interior buf off = 51 52 let left_child = Page.u32_be buf off in
+2 -1
lib/cell.mli
··· 8 8 type table_leaf = { 9 9 rowid : int64; 10 10 payload : string; 11 + payload_size : int; 11 12 overflow_page : int option; 12 13 } 13 - (** Table leaf cell: rowid + payload. *) 14 + (** Table leaf cell: rowid + payload (local portion) + total payload size. *) 14 15 15 16 type table_interior = { left_child : int; rowid : int64 } 16 17 (** Table interior cell: child page + rowid. *)
+129 -29
lib/table.ml
··· 24 24 let ptr_area_end = header_size + (header.Page.cell_count * 2) in 25 25 header.Page.cell_content_start - ptr_area_end - header.Page.fragmented_bytes 26 26 27 - (* Encode a table leaf cell *) 28 - let encode_table_leaf_cell ~rowid ~data = 27 + (* Read the full payload for a table leaf cell, following overflow chain *) 28 + let full_payload t (cell : Cell.table_leaf) = 29 + match cell.overflow_page with 30 + | None -> cell.payload 31 + | Some first_overflow -> 32 + let usable = usable_size t in 33 + let remaining = cell.payload_size - String.length cell.payload in 34 + let buf = Buffer.create cell.payload_size in 35 + Buffer.add_string buf cell.payload; 36 + let overflow_payload_size = usable - 4 in 37 + let rec follow page_num left = 38 + if left <= 0 then () 39 + else begin 40 + let page = Pager.read t.pager page_num in 41 + let next = Page.u32_be page 0 in 42 + let chunk = min left overflow_payload_size in 43 + Buffer.add_string buf (String.sub page 4 chunk); 44 + if next <> 0 then follow next (left - chunk) 45 + end 46 + in 47 + follow first_overflow remaining; 48 + Buffer.contents buf 49 + 50 + (* Encode a table leaf cell, spilling to overflow pages as needed *) 51 + let encode_table_leaf_cell ~pager ~rowid ~data = 52 + let usable = Pager.page_size pager in 53 + let payload_size = String.length data in 29 54 let rowid_varint = Varint.encode rowid in 30 - let payload_size_varint = Varint.encode (Int64.of_int (String.length data)) in 31 - let cell = 32 - Bytes.create 33 - (String.length payload_size_varint 34 - + String.length rowid_varint + String.length data) 55 + let payload_size_varint = Varint.encode (Int64.of_int payload_size) in 56 + let header_len = 57 + String.length payload_size_varint + String.length rowid_varint 35 58 in 36 - Bytes.blit_string payload_size_varint 0 cell 0 37 - (String.length payload_size_varint); 38 - Bytes.blit_string rowid_varint 0 cell 39 - (String.length payload_size_varint) 40 - (String.length rowid_varint); 41 - Bytes.blit_string data 0 cell 42 - (String.length payload_size_varint + String.length rowid_varint) 43 - (String.length data); 44 - Bytes.unsafe_to_string cell 59 + let max_local = usable - 35 in 60 + let min_local = ((usable - 12) * 32 / 255) - 23 in 61 + if payload_size <= max_local then begin 62 + (* Fits inline - no overflow needed *) 63 + let cell = Bytes.create (header_len + payload_size) in 64 + Bytes.blit_string payload_size_varint 0 cell 0 65 + (String.length payload_size_varint); 66 + Bytes.blit_string rowid_varint 0 cell 67 + (String.length payload_size_varint) 68 + (String.length rowid_varint); 69 + Bytes.blit_string data 0 cell header_len payload_size; 70 + Bytes.unsafe_to_string cell 71 + end 72 + else begin 73 + (* Need overflow pages *) 74 + let k = min_local + ((payload_size - min_local) mod (usable - 4)) in 75 + let local = if k <= max_local then k else min_local in 76 + let overflow_payload_size = usable - 4 in 77 + (* Write overflow chain *) 78 + let remaining = payload_size - local in 79 + let num_overflow_pages = 80 + (remaining + overflow_payload_size - 1) / overflow_payload_size 81 + in 82 + (* Allocate all overflow pages first so we know the page numbers *) 83 + let overflow_pages = 84 + Array.init num_overflow_pages (fun _ -> Pager.allocate pager) 85 + in 86 + (* Write data to overflow pages *) 87 + let data_off = ref local in 88 + for i = 0 to num_overflow_pages - 1 do 89 + let page_buf = Bytes.create usable in 90 + let next = 91 + if i < num_overflow_pages - 1 then overflow_pages.(i + 1) else 0 92 + in 93 + Page.set_u32_be page_buf 0 next; 94 + let chunk = min (payload_size - !data_off) overflow_payload_size in 95 + Bytes.blit_string data !data_off page_buf 4 chunk; 96 + data_off := !data_off + chunk; 97 + Pager.write pager overflow_pages.(i) (Bytes.unsafe_to_string page_buf) 98 + done; 99 + (* Build cell: header + local payload + 4-byte overflow pointer *) 100 + let cell = Bytes.create (header_len + local + 4) in 101 + Bytes.blit_string payload_size_varint 0 cell 0 102 + (String.length payload_size_varint); 103 + Bytes.blit_string rowid_varint 0 cell 104 + (String.length payload_size_varint) 105 + (String.length rowid_varint); 106 + Bytes.blit_string data 0 cell header_len local; 107 + Page.set_u32_be cell (header_len + local) overflow_pages.(0); 108 + Bytes.unsafe_to_string cell 109 + end 45 110 46 111 (* Encode a table interior cell *) 47 112 let encode_table_interior_cell ~left_child ~rowid = ··· 79 144 else 80 145 let mid = (lo + hi) / 2 in 81 146 let cell, _ = Cell.parse_table_leaf page ptrs.(mid) ~usable_size:usable in 82 - if cell.Cell.rowid = rowid then Some cell.Cell.payload 147 + if cell.Cell.rowid = rowid then Some (full_payload t cell) 83 148 else if cell.Cell.rowid < rowid then loop (mid + 1) hi 84 149 else loop lo (mid - 1) 85 150 in ··· 194 259 Cell.parse_table_leaf page ptrs.(split_idx) ~usable_size:usable 195 260 in 196 261 let separator_rowid = sep_cell.Cell.rowid in 197 - (* Right page: re-encode cells *) 262 + (* Right page: re-encode cells using full payload (follows overflow) *) 198 263 let new_page_num = Pager.allocate t.pager in 199 264 let new_buf = 200 265 pack_cells ~page_size ~kind:Page.Leaf_table ~start:split_idx 201 266 ~stop:header.Page.cell_count ~copy_cell:(fun i -> 202 267 let cell, _ = Cell.parse_table_leaf page ptrs.(i) ~usable_size:usable in 203 - encode_table_leaf_cell ~rowid:cell.Cell.rowid ~data:cell.Cell.payload) 268 + let data = full_payload t cell in 269 + encode_table_leaf_cell ~pager:t.pager ~rowid:cell.Cell.rowid ~data) 204 270 in 205 271 Pager.write t.pager new_page_num (Bytes.unsafe_to_string new_buf); 206 - (* Left page: raw-copy remaining cells *) 272 + (* Left page: raw-copy remaining cells (preserves overflow pointers) *) 207 273 let old_buf = 208 274 pack_cells ~page_size ~kind:Page.Leaf_table ~start:0 ~stop:split_idx 209 275 ~copy_cell:(fun i -> ··· 294 360 end 295 361 296 362 (* Insert into a leaf page, potentially splitting *) 297 - let rec insert_into_leaf t page_num ~rowid ~data ~parent_stack = 363 + and insert_into_leaf t page_num ~rowid ~data ~parent_stack = 298 364 let page = Pager.read t.pager page_num in 299 365 let header = Page.parse_header page 0 in 300 - let cell = encode_table_leaf_cell ~rowid ~data in 366 + let cell = encode_table_leaf_cell ~pager:t.pager ~rowid ~data in 301 367 let cell_len = String.length cell in 302 368 let space_needed = cell_len + 2 in 303 369 (* cell + pointer *) ··· 311 377 ~cell); 312 378 Pager.write t.pager page_num (Bytes.unsafe_to_string buf) 313 379 end 314 - else begin 315 - (* Need to split *) 380 + else if header.Page.cell_count >= 2 then begin 381 + (* Normal split: page has enough cells to split meaningfully *) 316 382 let split = split_leaf t page_num in 317 - 318 - (* Propagate split up first so tree structure is consistent *) 319 383 propagate_split t ~parent_stack ~left_page:page_num 320 384 ~separator_rowid:split.separator_rowid ~right_page:split.new_page; 321 - 322 - (* Now insert via full tree traversal (handles cascading splits) *) 385 + (* After split, insert via full tree traversal *) 323 386 insert t ~rowid data 324 387 end 388 + else begin 389 + (* Degenerate case: page has 0 or 1 cells and the new cell doesn't fit 390 + alongside them. Allocate a fresh page for the new cell and propagate 391 + a separator up. *) 392 + let page_size = Pager.page_size t.pager in 393 + let new_page_num = Pager.allocate t.pager in 394 + let new_buf = Page.init ~page_size ~kind:Page.Leaf_table in 395 + let new_header = Page.parse_header (Bytes.unsafe_to_string new_buf) 0 in 396 + ignore 397 + (write_and_insert_cell new_buf ~header:new_header ~kind:Page.Leaf_table 398 + ~index:0 ~cell); 399 + Pager.write t.pager new_page_num (Bytes.unsafe_to_string new_buf); 400 + (* Determine separator: the existing page's last rowid separates the pages. 401 + We need left page keys < separator, right page keys >= separator. *) 402 + if header.Page.cell_count = 0 then 403 + (* Existing page is empty. New cell goes to new page. Separator = rowid. *) 404 + propagate_split t ~parent_stack ~left_page:page_num ~separator_rowid:rowid 405 + ~right_page:new_page_num 406 + else begin 407 + (* Existing page has 1 cell. Figure out which page should be left/right. *) 408 + let usable = usable_size t in 409 + let ptrs = Page.cell_pointers page 0 header in 410 + let existing_cell, _ = 411 + Cell.parse_table_leaf page ptrs.(0) ~usable_size:usable 412 + in 413 + if rowid > existing_cell.Cell.rowid then 414 + (* New cell (larger rowid) goes right, existing stays left. 415 + Separator = new cell's rowid so routing sends >= rowid to right. *) 416 + propagate_split t ~parent_stack ~left_page:page_num 417 + ~separator_rowid:rowid ~right_page:new_page_num 418 + else 419 + (* New cell (smaller rowid) goes left, existing stays right. 420 + Separator = existing cell's rowid. *) 421 + propagate_split t ~parent_stack ~left_page:new_page_num 422 + ~separator_rowid:existing_cell.Cell.rowid ~right_page:page_num 423 + end 424 + end 325 425 326 426 and propagate_split t ~parent_stack ~left_page ~separator_rowid ~right_page = 327 427 match parent_stack with ··· 427 527 let cell, _ = 428 528 Cell.parse_table_leaf page ptrs.(i) ~usable_size:usable 429 529 in 430 - f cell.Cell.rowid cell.Cell.payload 530 + f cell.Cell.rowid (full_payload t cell) 431 531 done 432 532 | Page.Interior_table -> 433 533 for i = 0 to header.Page.cell_count - 1 do
+370
test/test_btree.ml
··· 452 452 Alcotest.(check (option string)) (Fmt.str "find %d" i) expected r 453 453 done 454 454 455 + let test_overflow_write_read () = 456 + (* Insert cells larger than max_local (>4061 bytes for 4096 page size) *) 457 + with_temp_file @@ fun file -> 458 + let pager = Btree.Pager.v ~page_size:4096 file in 459 + let tree = Btree.Table.v pager in 460 + (* 5000 bytes > max_local=4061 for usable_size=4096 *) 461 + let data1 = String.init 5000 (fun i -> Char.chr (i mod 256)) in 462 + let data2 = String.make 8000 'Z' in 463 + let data3 = String.make 20000 'Q' in 464 + Btree.Table.insert tree ~rowid:1L data1; 465 + Btree.Table.insert tree ~rowid:2L data2; 466 + Btree.Table.insert tree ~rowid:3L data3; 467 + let r1 = Btree.Table.find tree 1L in 468 + Alcotest.(check (option string)) "overflow 5000" (Some data1) r1; 469 + let r2 = Btree.Table.find tree 2L in 470 + Alcotest.(check (option string)) "overflow 8000" (Some data2) r2; 471 + let r3 = Btree.Table.find tree 3L in 472 + Alcotest.(check (option string)) "overflow 20000" (Some data3) r3 473 + 474 + let test_overflow_with_splits () = 475 + (* Many large cells forcing overflow + multiple splits *) 476 + with_temp_file @@ fun file -> 477 + let pager = Btree.Pager.v ~page_size:4096 file in 478 + let tree = Btree.Table.v pager in 479 + let n = 30 in 480 + for i = 1 to n do 481 + let data = String.make 5000 (Char.chr (65 + (i mod 26))) in 482 + Btree.Table.insert tree ~rowid:(Int64.of_int i) data 483 + done; 484 + for i = 1 to n do 485 + let expected = String.make 5000 (Char.chr (65 + (i mod 26))) in 486 + let r = Btree.Table.find tree (Int64.of_int i) in 487 + Alcotest.(check (option string)) (Fmt.str "find %d" i) (Some expected) r 488 + done; 489 + let count = Btree.Table.fold tree ~init:0 ~f:(fun _ _ acc -> acc + 1) in 490 + Alcotest.(check int) "total count" n count 491 + 492 + let test_mixed_overflow_and_normal () = 493 + (* Mix of small (inline) and large (overflow) cells *) 494 + with_temp_file @@ fun file -> 495 + let pager = Btree.Pager.v ~page_size:4096 file in 496 + let tree = Btree.Table.v pager in 497 + let n = 40 in 498 + for i = 1 to n do 499 + let size = if i mod 3 = 0 then 6000 else 100 in 500 + let data = String.make size (Char.chr (65 + (i mod 26))) in 501 + Btree.Table.insert tree ~rowid:(Int64.of_int i) data 502 + done; 503 + for i = 1 to n do 504 + let size = if i mod 3 = 0 then 6000 else 100 in 505 + let expected = String.make size (Char.chr (65 + (i mod 26))) in 506 + let r = Btree.Table.find tree (Int64.of_int i) in 507 + Alcotest.(check (option string)) (Fmt.str "find %d" i) (Some expected) r 508 + done 509 + 510 + let test_overflow_iter () = 511 + (* Verify iter/fold returns full payloads for overflow cells *) 512 + with_temp_file @@ fun file -> 513 + let pager = Btree.Pager.v ~page_size:4096 file in 514 + let tree = Btree.Table.v pager in 515 + let n = 10 in 516 + for i = 1 to n do 517 + let data = String.make 5000 (Char.chr (65 + (i mod 26))) in 518 + Btree.Table.insert tree ~rowid:(Int64.of_int i) data 519 + done; 520 + let items = ref [] in 521 + Btree.Table.iter tree (fun rowid data -> items := (rowid, data) :: !items); 522 + let sorted = List.sort compare !items in 523 + Alcotest.(check int) "iter count" n (List.length sorted); 524 + List.iter 525 + (fun (rowid, data) -> 526 + let i = Int64.to_int rowid in 527 + let expected = String.make 5000 (Char.chr (65 + (i mod 26))) in 528 + Alcotest.(check string) (Fmt.str "iter data %d" i) expected data) 529 + sorted 530 + 531 + let test_overflow_delete () = 532 + (* Insert large cells, delete some, verify remaining *) 533 + with_temp_file @@ fun file -> 534 + let pager = Btree.Pager.v ~page_size:4096 file in 535 + let tree = Btree.Table.v pager in 536 + for i = 1 to 10 do 537 + let data = String.make 5000 (Char.chr (65 + (i mod 26))) in 538 + Btree.Table.insert tree ~rowid:(Int64.of_int i) data 539 + done; 540 + (* Delete even-numbered rows *) 541 + for i = 1 to 5 do 542 + Btree.Table.delete tree (Int64.of_int (i * 2)) 543 + done; 544 + for i = 1 to 10 do 545 + let expected = 546 + if i mod 2 = 0 then None 547 + else Some (String.make 5000 (Char.chr (65 + (i mod 26)))) 548 + in 549 + let r = Btree.Table.find tree (Int64.of_int i) in 550 + Alcotest.(check (option string)) (Fmt.str "find %d" i) expected r 551 + done 552 + 553 + let test_near_max_local_cells () = 554 + (* Cells of ~2000 bytes: under max_local but large enough to stress splits *) 555 + with_temp_file @@ fun file -> 556 + let pager = Btree.Pager.v ~page_size:4096 file in 557 + let tree = Btree.Table.v pager in 558 + let n = 30 in 559 + for i = 1 to n do 560 + let data = String.make 2000 (Char.chr (65 + (i mod 26))) in 561 + Btree.Table.insert tree ~rowid:(Int64.of_int i) data 562 + done; 563 + for i = 1 to n do 564 + let expected = String.make 2000 (Char.chr (65 + (i mod 26))) in 565 + let r = Btree.Table.find tree (Int64.of_int i) in 566 + Alcotest.(check (option string)) (Fmt.str "find %d" i) (Some expected) r 567 + done; 568 + let count = Btree.Table.fold tree ~init:0 ~f:(fun _ _ acc -> acc + 1) in 569 + Alcotest.(check int) "total count" n count 570 + 571 + (* ---- SQLite spec test vectors (https://www.sqlite.org/fileformat.html) ---- *) 572 + 573 + (* Section 1.5: B-tree page type flags — verify via init + parse roundtrip *) 574 + let test_page_type_flags () = 575 + let check_kind kind expected_name = 576 + let buf = Btree.Page.init ~page_size:512 ~kind in 577 + let hdr = Btree.Page.parse_header (Bytes.unsafe_to_string buf) 0 in 578 + Alcotest.(check bool) expected_name true (hdr.Btree.Page.kind = kind) 579 + in 580 + check_kind Btree.Page.Interior_index "interior index"; 581 + check_kind Btree.Page.Interior_table "interior table"; 582 + check_kind Btree.Page.Leaf_index "leaf index"; 583 + check_kind Btree.Page.Leaf_table "leaf table" 584 + 585 + (* Section 1.5: Header sizes *) 586 + let test_header_sizes () = 587 + Alcotest.(check int) 588 + "leaf header" 8 589 + (Btree.Page.header_size Btree.Page.Leaf_table); 590 + Alcotest.(check int) 591 + "leaf index header" 8 592 + (Btree.Page.header_size Btree.Page.Leaf_index); 593 + Alcotest.(check int) 594 + "interior header" 12 595 + (Btree.Page.header_size Btree.Page.Interior_table); 596 + Alcotest.(check int) 597 + "interior index header" 12 598 + (Btree.Page.header_size Btree.Page.Interior_index) 599 + 600 + (* Section 1.3.6: Overflow thresholds — exact formulas from spec: 601 + Table leaf: X = U - 35 602 + Index: X = ((U-12)*64/255) - 23 603 + Both: M = ((U-12)*32/255) - 23 604 + With U = usable page size *) 605 + let test_overflow_thresholds () = 606 + (* 4096-byte pages: X_table = 4061, M = 489, X_index = 1001 *) 607 + let u = 4096 in 608 + Alcotest.(check int) 609 + "max_local table U=4096" (u - 35) 610 + (Btree.Cell.max_local ~usable_size:u ~is_table:true); 611 + Alcotest.(check int) 612 + "max_local index U=4096" 613 + (((u - 12) * 64 / 255) - 23) 614 + (Btree.Cell.max_local ~usable_size:u ~is_table:false); 615 + Alcotest.(check int) 616 + "min_local U=4096" 617 + (((u - 12) * 32 / 255) - 23) 618 + (Btree.Cell.min_local ~usable_size:u); 619 + (* 512-byte pages: X_table = 477, M = 39, X_index = 104 *) 620 + let u = 512 in 621 + Alcotest.(check int) 622 + "max_local table U=512" (u - 35) 623 + (Btree.Cell.max_local ~usable_size:u ~is_table:true); 624 + Alcotest.(check int) 625 + "min_local U=512" 626 + (((u - 12) * 32 / 255) - 23) 627 + (Btree.Cell.min_local ~usable_size:u); 628 + (* 1024-byte pages *) 629 + let u = 1024 in 630 + Alcotest.(check int) 631 + "max_local table U=1024" (u - 35) 632 + (Btree.Cell.max_local ~usable_size:u ~is_table:true); 633 + Alcotest.(check int) 634 + "min_local U=1024" 635 + (((u - 12) * 32 / 255) - 23) 636 + (Btree.Cell.min_local ~usable_size:u); 637 + (* 65536-byte pages *) 638 + let u = 65536 in 639 + Alcotest.(check int) 640 + "max_local table U=65536" (u - 35) 641 + (Btree.Cell.max_local ~usable_size:u ~is_table:true); 642 + Alcotest.(check int) 643 + "min_local U=65536" 644 + (((u - 12) * 32 / 255) - 23) 645 + (Btree.Cell.min_local ~usable_size:u) 646 + 647 + (* Section 1.3.6: K computation for overflow boundary *) 648 + let test_overflow_k_computation () = 649 + (* For U=4096 table leaf: X=4061, M=489 650 + P=4062 (just over X): K = 489 + ((4062-489) % (4096-4)) = 489 + (3573 % 4092) 651 + = 489 + 3573 = 4062 <= 4061? No -> local = M = 489 *) 652 + (* P=5000: K = 489 + ((5000-489) % 4092) = 489 + (4511 % 4092) = 489 + 419 = 908 653 + 908 <= 4061 -> local = 908 *) 654 + with_temp_file @@ fun file -> 655 + let pager = Btree.Pager.v ~page_size:4096 file in 656 + let tree = Btree.Table.v pager in 657 + (* Insert payload of exactly 4061 bytes (= X, no overflow) *) 658 + let data_exact = String.make 4061 'A' in 659 + Btree.Table.insert tree ~rowid:1L data_exact; 660 + Alcotest.(check (option string)) 661 + "exact max_local" (Some data_exact) (Btree.Table.find tree 1L); 662 + (* Insert payload of 4062 bytes (= X+1, needs overflow) *) 663 + let data_over = String.make 4062 'B' in 664 + Btree.Table.insert tree ~rowid:2L data_over; 665 + Alcotest.(check (option string)) 666 + "X+1 overflow" (Some data_over) (Btree.Table.find tree 2L); 667 + (* Insert payload at overflow capacity boundary: exactly U-4 = 4092 overflow *) 668 + let data_one_overflow = String.make (4061 + 4092) 'C' in 669 + Btree.Table.insert tree ~rowid:3L data_one_overflow; 670 + Alcotest.(check (option string)) 671 + "one full overflow page" (Some data_one_overflow) (Btree.Table.find tree 3L); 672 + (* Multi-page overflow: 3 overflow pages worth *) 673 + let data_multi = String.make (4061 + (3 * 4092)) 'D' in 674 + Btree.Table.insert tree ~rowid:4L data_multi; 675 + Alcotest.(check (option string)) 676 + "multi overflow" (Some data_multi) (Btree.Table.find tree 4L) 677 + 678 + (* Record serial types from Section 2.1 *) 679 + let test_record_serial_type_sizes () = 680 + let open Btree.Record in 681 + (* Type 0: NULL = 0 bytes *) 682 + let r = encode [ Vnull ] in 683 + let d = decode r in 684 + Alcotest.(check int) "null count" 1 (List.length d); 685 + (* Type 1: 8-bit int *) 686 + let r = encode [ Vint 42L ] in 687 + let d = decode r in 688 + (match d with [ Vint 42L ] -> () | _ -> Alcotest.fail "expected 42"); 689 + (* Type 8: constant 0 *) 690 + let r = encode [ Vint 0L ] in 691 + let d = decode r in 692 + (match d with [ Vint 0L ] -> () | _ -> Alcotest.fail "expected 0"); 693 + (* Type 9: constant 1 *) 694 + let r = encode [ Vint 1L ] in 695 + let d = decode r in 696 + (match d with [ Vint 1L ] -> () | _ -> Alcotest.fail "expected 1"); 697 + (* Type 6: 64-bit int *) 698 + let r = encode [ Vint Int64.max_int ] in 699 + let d = decode r in 700 + (match d with 701 + | [ Vint n ] -> Alcotest.(check int64) "max_int" Int64.max_int n 702 + | _ -> Alcotest.fail "expected max_int"); 703 + (* Negative *) 704 + let r = encode [ Vint (-1L) ] in 705 + let d = decode r in 706 + (match d with 707 + | [ Vint n ] -> Alcotest.(check int64) "neg1" (-1L) n 708 + | _ -> Alcotest.fail "expected -1"); 709 + (* Type N>=12 even: BLOB of (N-12)/2 bytes *) 710 + let blob = "\x00\x01\x02\x03\x04" in 711 + let r = encode [ Vblob blob ] in 712 + let d = decode r in 713 + (match d with 714 + | [ Vblob s ] -> Alcotest.(check string) "blob" blob s 715 + | _ -> Alcotest.fail "expected blob"); 716 + (* Type N>=13 odd: TEXT of (N-13)/2 bytes *) 717 + let text = "hello" in 718 + let r = encode [ Vtext text ] in 719 + let d = decode r in 720 + match d with 721 + | [ Vtext s ] -> Alcotest.(check string) "text" text s 722 + | _ -> Alcotest.fail "expected text" 723 + 724 + (* Test all valid page sizes from spec: 512..65536, powers of 2 *) 725 + let test_all_page_sizes () = 726 + let page_sizes = [ 512; 1024; 2048; 4096; 8192; 16384; 32768; 65536 ] in 727 + List.iter 728 + (fun ps -> 729 + with_temp_file @@ fun file -> 730 + let pager = Btree.Pager.v ~page_size:ps file in 731 + let tree = Btree.Table.v pager in 732 + for i = 1 to 20 do 733 + Btree.Table.insert tree ~rowid:(Int64.of_int i) (Fmt.str "v%d" i) 734 + done; 735 + for i = 1 to 20 do 736 + let r = Btree.Table.find tree (Int64.of_int i) in 737 + Alcotest.(check (option string)) 738 + (Fmt.str "ps=%d find %d" ps i) 739 + (Some (Fmt.str "v%d" i)) 740 + r 741 + done) 742 + page_sizes 743 + 744 + (* Overflow at every valid page size *) 745 + let test_overflow_all_page_sizes () = 746 + let page_sizes = [ 512; 1024; 2048; 4096 ] in 747 + List.iter 748 + (fun ps -> 749 + with_temp_file @@ fun file -> 750 + let pager = Btree.Pager.v ~page_size:ps file in 751 + let tree = Btree.Table.v pager in 752 + (* Insert data larger than max_local for this page size *) 753 + let max_local = ps - 35 in 754 + let overflow_size = max_local + 100 in 755 + for i = 1 to 10 do 756 + let data = String.make overflow_size (Char.chr (65 + (i mod 26))) in 757 + Btree.Table.insert tree ~rowid:(Int64.of_int i) data 758 + done; 759 + for i = 1 to 10 do 760 + let expected = String.make overflow_size (Char.chr (65 + (i mod 26))) in 761 + let r = Btree.Table.find tree (Int64.of_int i) in 762 + Alcotest.(check (option string)) 763 + (Fmt.str "ps=%d overflow find %d" ps i) 764 + (Some expected) r 765 + done) 766 + page_sizes 767 + 768 + (* Varint encoding: 9-byte form for large values *) 769 + let test_varint_9byte () = 770 + (* Values requiring maximum 9 bytes *) 771 + let large = Int64.max_int in 772 + let enc = Btree.Varint.encode large in 773 + Alcotest.(check int) "9-byte varint size" 9 (String.length enc); 774 + let dec, consumed = Btree.Varint.decode enc 0 in 775 + Alcotest.(check int64) "9-byte roundtrip" large dec; 776 + Alcotest.(check int) "consumed 9" 9 consumed; 777 + (* Negative value *) 778 + let neg = Int64.min_int in 779 + let enc = Btree.Varint.encode neg in 780 + let dec, _ = Btree.Varint.decode enc 0 in 781 + Alcotest.(check int64) "min_int roundtrip" neg dec 782 + 783 + (* Stress: many splits with overflow cells *) 784 + let test_stress_overflow_splits () = 785 + with_temp_file @@ fun file -> 786 + let pager = Btree.Pager.v ~page_size:4096 file in 787 + let tree = Btree.Table.v pager in 788 + (* 50 cells of increasing size, some with overflow *) 789 + for i = 1 to 50 do 790 + let size = 500 + (i * 100) in 791 + let data = String.make size (Char.chr (65 + (i mod 26))) in 792 + Btree.Table.insert tree ~rowid:(Int64.of_int i) data 793 + done; 794 + for i = 1 to 50 do 795 + let size = 500 + (i * 100) in 796 + let expected = String.make size (Char.chr (65 + (i mod 26))) in 797 + let r = Btree.Table.find tree (Int64.of_int i) in 798 + Alcotest.(check (option string)) 799 + (Fmt.str "stress find %d (size=%d)" i size) 800 + (Some expected) r 801 + done; 802 + let count = Btree.Table.fold tree ~init:0 ~f:(fun _ _ acc -> acc + 1) in 803 + Alcotest.(check int) "stress count" 50 count 804 + 455 805 let suite = 456 806 ( "btree", 457 807 [ ··· 484 834 Alcotest.test_case "splits near_page_size" `Quick 485 835 test_near_page_size_cells; 486 836 Alcotest.test_case "splits delete_after" `Quick test_delete_after_splits; 837 + Alcotest.test_case "overflow write_read" `Quick test_overflow_write_read; 838 + Alcotest.test_case "overflow with_splits" `Quick test_overflow_with_splits; 839 + Alcotest.test_case "overflow mixed" `Quick test_mixed_overflow_and_normal; 840 + Alcotest.test_case "overflow iter" `Quick test_overflow_iter; 841 + Alcotest.test_case "overflow delete" `Quick test_overflow_delete; 842 + Alcotest.test_case "overflow near_max_local" `Quick 843 + test_near_max_local_cells; 844 + Alcotest.test_case "spec page_type_flags" `Quick test_page_type_flags; 845 + Alcotest.test_case "spec header_sizes" `Quick test_header_sizes; 846 + Alcotest.test_case "spec overflow_thresholds" `Quick 847 + test_overflow_thresholds; 848 + Alcotest.test_case "spec overflow_k" `Quick test_overflow_k_computation; 849 + Alcotest.test_case "spec serial_types" `Quick 850 + test_record_serial_type_sizes; 851 + Alcotest.test_case "spec all_page_sizes" `Quick test_all_page_sizes; 852 + Alcotest.test_case "spec overflow_all_pages" `Quick 853 + test_overflow_all_page_sizes; 854 + Alcotest.test_case "spec varint_9byte" `Quick test_varint_9byte; 855 + Alcotest.test_case "spec stress_overflow" `Quick 856 + test_stress_overflow_splits; 487 857 ] )