Pure OCaml B-tree implementation for persistent storage
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix(btree): handle large cells that don't fit after leaf split

After splitting a leaf, re-insert via full tree traversal instead of
directly writing to the target page. This handles cascading splits
when cells are larger than half the page size.

Add tests for large cells (~50% page), mixed cell sizes, near-page-size
cells, and delete-after-split scenarios. All 41 btree tests pass in <1s.

+79 -17
+6 -17
lib/table.ml
··· 315 315 (* Need to split *) 316 316 let split = split_leaf t page_num in 317 317 318 - (* Determine target page and insert directly (no recursion needed - 319 - after split, both pages have ~half capacity, plenty of room) *) 320 - let target_page = 321 - if rowid < split.separator_rowid then page_num else split.new_page 322 - in 323 - let target = Pager.read t.pager target_page in 324 - let target_header = Page.parse_header target 0 in 325 - let target_buf = Bytes.of_string target in 326 - let insert_idx = insert_idx t target target_header rowid in 327 - ignore 328 - (write_and_insert_cell target_buf ~header:target_header 329 - ~kind:Page.Leaf_table ~index:insert_idx ~cell); 330 - Pager.write t.pager target_page (Bytes.unsafe_to_string target_buf); 318 + (* Propagate split up first so tree structure is consistent *) 319 + propagate_split t ~parent_stack ~left_page:page_num 320 + ~separator_rowid:split.separator_rowid ~right_page:split.new_page; 331 321 332 - (* Propagate split up *) 333 - propagate_split t ~parent_stack ~left_page:page_num 334 - ~separator_rowid:split.separator_rowid ~right_page:split.new_page 322 + (* Now insert via full tree traversal (handles cascading splits) *) 323 + insert t ~rowid data 335 324 end 336 325 337 326 and propagate_split t ~parent_stack ~left_page ~separator_rowid ~right_page = ··· 366 355 ~separator_rowid:split.separator_rowid ~right_page:split.new_page) 367 356 368 357 (* Main insert - traverses tree and handles splits *) 369 - let insert t ~rowid data = 358 + and insert t ~rowid data = 370 359 let rec traverse page_num parent_stack = 371 360 let page = Pager.read t.pager page_num in 372 361 let header = Page.parse_header page 0 in
+73
test/test_btree.ml
··· 384 384 prev := rowid); 385 385 Alcotest.(check int64) "last rowid" (Int64.of_int n) !prev 386 386 387 + let test_large_cells_split () = 388 + (* Cells larger than half the page — the exact bug case. 389 + With 512-byte pages, cells of ~300 bytes will cause splits where 390 + the target page may not have room after a naive count-based split. *) 391 + with_temp_file @@ fun file -> 392 + let pager = Btree.Pager.v ~page_size:512 file in 393 + let tree = Btree.Table.v pager in 394 + for i = 1 to 20 do 395 + let data = String.make 200 (Char.chr (65 + (i mod 26))) in 396 + Btree.Table.insert tree ~rowid:(Int64.of_int i) data 397 + done; 398 + for i = 1 to 20 do 399 + let expected = String.make 200 (Char.chr (65 + (i mod 26))) in 400 + let r = Btree.Table.find tree (Int64.of_int i) in 401 + Alcotest.(check (option string)) (Fmt.str "find %d" i) (Some expected) r 402 + done 403 + 404 + let test_mixed_cell_sizes () = 405 + (* Mix of tiny and large cells to stress uneven splits *) 406 + with_temp_file @@ fun file -> 407 + let pager = Btree.Pager.v ~page_size:512 file in 408 + let tree = Btree.Table.v pager in 409 + for i = 1 to 50 do 410 + let size = if i mod 3 = 0 then 200 else 5 in 411 + let data = String.make size 'x' in 412 + Btree.Table.insert tree ~rowid:(Int64.of_int i) data 413 + done; 414 + for i = 1 to 50 do 415 + let size = if i mod 3 = 0 then 200 else 5 in 416 + let expected = String.make size 'x' in 417 + let r = Btree.Table.find tree (Int64.of_int i) in 418 + Alcotest.(check (option string)) (Fmt.str "find %d" i) (Some expected) r 419 + done 420 + 421 + let test_near_page_size_cells () = 422 + (* Cells approaching page size limit — each cell uses most of the page *) 423 + with_temp_file @@ fun file -> 424 + let pager = Btree.Pager.v ~page_size:4096 file in 425 + let tree = Btree.Table.v pager in 426 + for i = 1 to 10 do 427 + let data = String.make 2000 (Char.chr (65 + (i mod 26))) in 428 + Btree.Table.insert tree ~rowid:(Int64.of_int i) data 429 + done; 430 + for i = 1 to 10 do 431 + let expected = String.make 2000 (Char.chr (65 + (i mod 26))) in 432 + let r = Btree.Table.find tree (Int64.of_int i) in 433 + Alcotest.(check (option string)) (Fmt.str "find %d" i) (Some expected) r 434 + done; 435 + let count = Btree.Table.fold tree ~init:0 ~f:(fun _ _ acc -> acc + 1) in 436 + Alcotest.(check int) "total count" 10 count 437 + 438 + let test_delete_after_splits () = 439 + (* Insert enough to cause splits, then delete and verify *) 440 + with_temp_file @@ fun file -> 441 + let pager = Btree.Pager.v ~page_size:512 file in 442 + let tree = Btree.Table.v pager in 443 + for i = 1 to 100 do 444 + Btree.Table.insert tree ~rowid:(Int64.of_int i) (Fmt.str "v%d" i) 445 + done; 446 + for i = 1 to 50 do 447 + Btree.Table.delete tree (Int64.of_int (i * 2)) 448 + done; 449 + for i = 1 to 100 do 450 + let expected = if i mod 2 = 0 then None else Some (Fmt.str "v%d" i) in 451 + let r = Btree.Table.find tree (Int64.of_int i) in 452 + Alcotest.(check (option string)) (Fmt.str "find %d" i) expected r 453 + done 454 + 387 455 let suite = 388 456 ( "btree", 389 457 [ ··· 411 479 Alcotest.test_case "splits random_order" `Quick test_random_insert_order; 412 480 Alcotest.test_case "splits large_values" `Quick test_large_values; 413 481 Alcotest.test_case "splits iter_order" `Quick test_iter_order_after_splits; 482 + Alcotest.test_case "splits large_cells" `Quick test_large_cells_split; 483 + Alcotest.test_case "splits mixed_sizes" `Quick test_mixed_cell_sizes; 484 + Alcotest.test_case "splits near_page_size" `Quick 485 + test_near_page_size_cells; 486 + Alcotest.test_case "splits delete_after" `Quick test_delete_after_splits; 414 487 ] )