Pure OCaml B-tree implementation for persistent storage
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Remove broken ocaml-monitor subtree

+771 -26
+754 -23
lib/btree.ml
··· 180 180 let rowid, consumed = Varint.decode buf (off + 4) in 181 181 ({ left_child; rowid }, 4 + consumed) 182 182 183 - let parse_index_leaf buf off ~usable_size = 183 + (* Parse index leaf cell - returns local payload and overflow pointer *) 184 + let parse_index_leaf_raw buf off ~usable_size = 184 185 let payload_size, consumed = Varint.decode buf off in 185 186 let payload_size = Int64.to_int payload_size in 186 187 let max_local = max_local ~usable_size ~is_table:false in ··· 195 196 let overflow = get_u32_be buf (off + consumed + local) in 196 197 (local, Some overflow) 197 198 in 198 - let payload = String.sub buf (off + consumed) local_size in 199 + let local_payload = String.sub buf (off + consumed) local_size in 199 200 let total = consumed + local_size + if overflow_page = None then 0 else 4 in 200 - ({ payload; overflow_page }, total) 201 + (payload_size, local_payload, overflow_page, total) 201 202 202 - let parse_index_interior buf off ~usable_size = 203 + let parse_index_leaf buf off ~usable_size = 204 + let payload_size, local_payload, overflow_page, total = 205 + parse_index_leaf_raw buf off ~usable_size 206 + in 207 + (* For now, return just the local payload - caller must handle overflow *) 208 + ignore payload_size; 209 + ({ payload = local_payload; overflow_page }, total) 210 + 211 + (* Parse index interior cell - returns local payload and overflow pointer *) 212 + let parse_index_interior_raw buf off ~usable_size = 203 213 let left_child = get_u32_be buf off in 204 214 let payload_size, consumed = Varint.decode buf (off + 4) in 205 215 let payload_size = Int64.to_int payload_size in ··· 215 225 let overflow = get_u32_be buf (off + 4 + consumed + local) in 216 226 (local, Some overflow) 217 227 in 218 - let payload = String.sub buf (off + 4 + consumed) local_size in 228 + let local_payload = String.sub buf (off + 4 + consumed) local_size in 219 229 let total = 220 230 4 + consumed + local_size + if overflow_page = None then 0 else 4 221 231 in 222 - ({ left_child; payload; overflow_page }, total) 232 + (left_child, payload_size, local_payload, overflow_page, total) 233 + 234 + let parse_index_interior buf off ~usable_size = 235 + let left_child, _payload_size, local_payload, overflow_page, total = 236 + parse_index_interior_raw buf off ~usable_size 237 + in 238 + ({ left_child; payload = local_payload; overflow_page }, total) 223 239 end 224 240 225 241 (* Record format *) ··· 946 962 module Index = struct 947 963 type t = { pager : Pager.t; mutable root_page : int } 948 964 949 - let create pager = 950 - let root = Pager.allocate pager in 951 - let page_size = Pager.page_size pager in 965 + (* Initialize a page as empty leaf or interior *) 966 + let init_page ~page_size ~page_type = 952 967 let buf = Bytes.create page_size in 953 - Bytes.set_uint8 buf 0 (byte_of_page_type Leaf_index); 968 + Bytes.set_uint8 buf 0 (byte_of_page_type page_type); 954 969 set_u16_be buf 1 0; 970 + (* first freeblock *) 955 971 set_u16_be buf 3 0; 972 + (* cell count *) 956 973 set_u16_be buf 5 page_size; 974 + (* cell content start *) 957 975 Bytes.set_uint8 buf 7 0; 976 + (* fragmented bytes *) 977 + if is_interior page_type then set_u32_be buf 8 0; 978 + (* right child *) 979 + buf 980 + 981 + let create pager = 982 + let root = Pager.allocate pager in 983 + let page_size = Pager.page_size pager in 984 + let buf = init_page ~page_size ~page_type:Leaf_index in 958 985 Pager.write pager root (Bytes.unsafe_to_string buf); 959 986 { pager; root_page = root } 960 987 ··· 962 989 let root_page t = t.root_page 963 990 let usable_size t = Pager.page_size t.pager 964 991 992 + (* Overflow page handling *) 993 + let read_overflow_chain pager first_page ~remaining_size = 994 + let usable = Pager.page_size pager in 995 + let overflow_content_size = usable - 4 in 996 + let buf = Buffer.create remaining_size in 997 + let rec read page_num remaining = 998 + if remaining <= 0 || page_num = 0 then () 999 + else begin 1000 + let page = Pager.read pager page_num in 1001 + let next_page = get_u32_be page 0 in 1002 + let to_read = min remaining overflow_content_size in 1003 + Buffer.add_substring buf page 4 to_read; 1004 + read next_page (remaining - to_read) 1005 + end 1006 + in 1007 + read first_page remaining_size; 1008 + Buffer.contents buf 1009 + 1010 + let write_overflow_chain pager payload ~offset = 1011 + let usable = Pager.page_size pager in 1012 + let overflow_content_size = usable - 4 in 1013 + let remaining = String.length payload - offset in 1014 + if remaining <= 0 then 0 1015 + else begin 1016 + let first_page = ref 0 in 1017 + let prev_page_buf = ref None in 1018 + let prev_page_num = ref 0 in 1019 + let pos = ref offset in 1020 + while !pos < String.length payload do 1021 + let page_num = Pager.allocate pager in 1022 + if !first_page = 0 then first_page := page_num; 1023 + (* Link previous page to this one *) 1024 + (match !prev_page_buf with 1025 + | Some buf -> 1026 + set_u32_be buf 0 page_num; 1027 + Pager.write pager !prev_page_num (Bytes.unsafe_to_string buf) 1028 + | None -> ()); 1029 + (* Write this page *) 1030 + let page_buf = Bytes.create usable in 1031 + set_u32_be page_buf 0 0; 1032 + (* Next page = 0 for now *) 1033 + let to_write = 1034 + min (String.length payload - !pos) overflow_content_size 1035 + in 1036 + Bytes.blit_string payload !pos page_buf 4 to_write; 1037 + prev_page_buf := Some page_buf; 1038 + prev_page_num := page_num; 1039 + pos := !pos + to_write 1040 + done; 1041 + (* Write final page *) 1042 + (match !prev_page_buf with 1043 + | Some buf -> 1044 + Pager.write pager !prev_page_num (Bytes.unsafe_to_string buf) 1045 + | None -> ()); 1046 + !first_page 1047 + end 1048 + 1049 + (* Read full payload including overflow pages for leaf cells *) 1050 + let read_full_payload t page off ~usable_size = 1051 + let payload_size, local_payload, overflow_page, _consumed = 1052 + Cell.parse_index_leaf_raw page off ~usable_size 1053 + in 1054 + match overflow_page with 1055 + | None -> local_payload 1056 + | Some first_overflow -> 1057 + let remaining = payload_size - String.length local_payload in 1058 + let overflow_data = 1059 + read_overflow_chain t.pager first_overflow ~remaining_size:remaining 1060 + in 1061 + local_payload ^ overflow_data 1062 + 1063 + (* Read full payload including overflow pages for interior cells *) 1064 + let read_full_interior_payload t page off ~usable_size = 1065 + let _left_child, payload_size, local_payload, overflow_page, _consumed = 1066 + Cell.parse_index_interior_raw page off ~usable_size 1067 + in 1068 + match overflow_page with 1069 + | None -> local_payload 1070 + | Some first_overflow -> 1071 + let remaining = payload_size - String.length local_payload in 1072 + let overflow_data = 1073 + read_overflow_chain t.pager first_overflow ~remaining_size:remaining 1074 + in 1075 + local_payload ^ overflow_data 1076 + 965 1077 let cell_pointers page header = 966 1078 let ptrs = Array.make header.cell_count 0 in 967 1079 let ptr_start = page_header_size header.page_type in ··· 970 1082 done; 971 1083 ptrs 972 1084 1085 + (* Calculate free space in a page *) 1086 + let free_space header ~page_type = 1087 + let header_size = page_header_size page_type in 1088 + let ptr_area_end = header_size + (header.cell_count * 2) in 1089 + header.cell_content_start - ptr_area_end - header.fragmented_bytes 1090 + 1091 + (* Encode an index leaf cell - handles overflow for large payloads *) 1092 + let encode_index_leaf_cell_with_overflow t ~payload = 1093 + let payload_size = String.length payload in 1094 + let usable_size = usable_size t in 1095 + let max_local = Cell.max_local ~usable_size ~is_table:false in 1096 + let min_local = Cell.min_local ~usable_size in 1097 + let payload_size_varint = Varint.encode (Int64.of_int payload_size) in 1098 + let varint_len = String.length payload_size_varint in 1099 + if payload_size <= max_local then begin 1100 + (* Fits entirely - no overflow needed *) 1101 + let cell = Bytes.create (varint_len + payload_size) in 1102 + Bytes.blit_string payload_size_varint 0 cell 0 varint_len; 1103 + Bytes.blit_string payload 0 cell varint_len payload_size; 1104 + Bytes.unsafe_to_string cell 1105 + end 1106 + else begin 1107 + (* Need overflow pages *) 1108 + let k = min_local + ((payload_size - min_local) mod (usable_size - 4)) in 1109 + let local_size = if k <= max_local then k else min_local in 1110 + (* Write overflow pages for data beyond local_size *) 1111 + let overflow_page = 1112 + write_overflow_chain t.pager payload ~offset:local_size 1113 + in 1114 + (* Create cell: varint(size) + local_payload + overflow_ptr *) 1115 + let cell = Bytes.create (varint_len + local_size + 4) in 1116 + Bytes.blit_string payload_size_varint 0 cell 0 varint_len; 1117 + Bytes.blit_string payload 0 cell varint_len local_size; 1118 + set_u32_be cell (varint_len + local_size) overflow_page; 1119 + Bytes.unsafe_to_string cell 1120 + end 1121 + 1122 + (* Encode an index interior cell *) 1123 + let encode_index_interior_cell ~left_child ~payload = 1124 + let payload_size_varint = 1125 + Varint.encode (Int64.of_int (String.length payload)) 1126 + in 1127 + let cell = 1128 + Bytes.create 1129 + (4 + String.length payload_size_varint + String.length payload) 1130 + in 1131 + set_u32_be cell 0 left_child; 1132 + Bytes.blit_string payload_size_varint 0 cell 4 1133 + (String.length payload_size_varint); 1134 + Bytes.blit_string payload 0 cell 1135 + (4 + String.length payload_size_varint) 1136 + (String.length payload); 1137 + Bytes.unsafe_to_string cell 1138 + 1139 + (* Write a cell into a page buffer, returns new cell_content_start *) 1140 + let write_cell buf ~cell_content_start ~cell = 1141 + let cell_len = String.length cell in 1142 + let new_start = cell_content_start - cell_len in 1143 + Bytes.blit_string cell 0 buf new_start cell_len; 1144 + new_start 1145 + 1146 + (* Insert a cell pointer at index, shifting others *) 1147 + let insert_cell_pointer buf ~header_offset ~page_type ~cell_count ~index ~ptr 1148 + = 1149 + let ptr_start = header_offset + page_header_size page_type in 1150 + (* Shift existing pointers right *) 1151 + for i = cell_count - 1 downto index do 1152 + let old_ptr = 1153 + get_u16_be (Bytes.unsafe_to_string buf) (ptr_start + (i * 2)) 1154 + in 1155 + set_u16_be buf (ptr_start + ((i + 1) * 2)) old_ptr 1156 + done; 1157 + set_u16_be buf (ptr_start + (index * 2)) ptr 1158 + 1159 + (* Compare for btree navigation: key is strictly less than payload. 1160 + Special case: if key is a prefix of payload, they are equal for navigation purposes 1161 + (i.e., entries starting with key might be at or after payload's position). *) 1162 + let key_less_than key payload = 1163 + let key_len = String.length key in 1164 + let payload_len = String.length payload in 1165 + let cmp_len = min key_len payload_len in 1166 + let cmp = 1167 + String.compare (String.sub key 0 cmp_len) (String.sub payload 0 cmp_len) 1168 + in 1169 + if cmp <> 0 then cmp < 0 1170 + else 1171 + (* Prefixes match - if key is shorter or equal length, it's NOT strictly less *) 1172 + false 1173 + 973 1174 let rec mem_in_page t page_num key = 974 1175 let page = Pager.read t.pager page_num in 975 1176 let header = parse_page_header page 0 in ··· 977 1178 let usable = usable_size t in 978 1179 match header.page_type with 979 1180 | Leaf_index -> 980 - let rec search i = 981 - if i >= header.cell_count then false 1181 + let rec search lo hi = 1182 + if lo > hi then false 982 1183 else 1184 + let mid = (lo + hi) / 2 in 983 1185 let cell, _ = 984 - Cell.parse_index_leaf page ptrs.(i) ~usable_size:usable 1186 + Cell.parse_index_leaf page ptrs.(mid) ~usable_size:usable 985 1187 in 986 - if cell.payload = key then true 987 - else if cell.payload > key then false 988 - else search (i + 1) 1188 + let cmp = String.compare key cell.payload in 1189 + if cmp = 0 then true 1190 + else if cmp < 0 then search lo (mid - 1) 1191 + else search (mid + 1) hi 989 1192 in 990 - search 0 1193 + search 0 (header.cell_count - 1) 991 1194 | Interior_index -> 992 1195 let rec find_child i = 993 1196 if i >= header.cell_count then Option.get header.right_child ··· 995 1198 let cell, _ = 996 1199 Cell.parse_index_interior page ptrs.(i) ~usable_size:usable 997 1200 in 998 - if key <= cell.payload then cell.left_child else find_child (i + 1) 1201 + if key_less_than key cell.payload then cell.left_child 1202 + else find_child (i + 1) 999 1203 in 1000 1204 mem_in_page t (find_child 0) key 1001 1205 | _ -> failwith "Invalid page type in index B-tree" 1002 1206 1003 1207 let mem t key = mem_in_page t t.root_page key 1004 - let insert _t _key = failwith "Index insert not yet implemented" 1005 - let delete _t _key = failwith "Index delete not yet implemented" 1208 + 1209 + (* Find exact key, returns payload *) 1210 + let rec find_in_page t page_num key = 1211 + let page = Pager.read t.pager page_num in 1212 + let header = parse_page_header page 0 in 1213 + let ptrs = cell_pointers page header in 1214 + let usable = usable_size t in 1215 + match header.page_type with 1216 + | Leaf_index -> 1217 + let rec search lo hi = 1218 + if lo > hi then None 1219 + else 1220 + let mid = (lo + hi) / 2 in 1221 + (* Read full payload including overflow *) 1222 + let full_payload = 1223 + read_full_payload t page ptrs.(mid) ~usable_size:usable 1224 + in 1225 + let cmp = String.compare key full_payload in 1226 + if cmp = 0 then Some full_payload 1227 + else if cmp < 0 then search lo (mid - 1) 1228 + else search (mid + 1) hi 1229 + in 1230 + search 0 (header.cell_count - 1) 1231 + | Interior_index -> 1232 + let rec find_child_rec i = 1233 + if i >= header.cell_count then Option.get header.right_child 1234 + else 1235 + (* Read full payload for interior comparison *) 1236 + let full_payload = 1237 + read_full_interior_payload t page ptrs.(i) ~usable_size:usable 1238 + in 1239 + if key_less_than key full_payload then 1240 + let cell, _ = 1241 + Cell.parse_index_interior page ptrs.(i) ~usable_size:usable 1242 + in 1243 + cell.left_child 1244 + else find_child_rec (i + 1) 1245 + in 1246 + find_in_page t (find_child_rec 0) key 1247 + | _ -> failwith "Invalid page type in index B-tree" 1248 + 1249 + let find t key = find_in_page t t.root_page key 1250 + 1251 + (* Find a key in leaf page, returns (payload, index) if found *) 1252 + let find_in_leaf t page header key = 1253 + let ptrs = cell_pointers page header in 1254 + let usable = usable_size t in 1255 + let rec search lo hi = 1256 + if lo > hi then None 1257 + else 1258 + let mid = (lo + hi) / 2 in 1259 + let full_payload = 1260 + read_full_payload t page ptrs.(mid) ~usable_size:usable 1261 + in 1262 + let cmp = String.compare key full_payload in 1263 + if cmp = 0 then Some (full_payload, mid) 1264 + else if cmp < 0 then search lo (mid - 1) 1265 + else search (mid + 1) hi 1266 + in 1267 + search 0 (header.cell_count - 1) 1268 + 1269 + (* Find insertion index for key in leaf page *) 1270 + let find_insert_idx t page header key = 1271 + let ptrs = cell_pointers page header in 1272 + let usable = usable_size t in 1273 + let rec find i = 1274 + if i >= header.cell_count then i 1275 + else 1276 + let full_payload = 1277 + read_full_payload t page ptrs.(i) ~usable_size:usable 1278 + in 1279 + if key < full_payload then i else find (i + 1) 1280 + in 1281 + find 0 1282 + 1283 + (* Find child page for key in interior page *) 1284 + let find_child t page header key = 1285 + let ptrs = cell_pointers page header in 1286 + let usable = usable_size t in 1287 + let rec loop i = 1288 + if i >= header.cell_count then Option.get header.right_child 1289 + else 1290 + let full_payload = 1291 + read_full_interior_payload t page ptrs.(i) ~usable_size:usable 1292 + in 1293 + if key_less_than key full_payload then 1294 + let cell, _ = 1295 + Cell.parse_index_interior page ptrs.(i) ~usable_size:usable 1296 + in 1297 + cell.left_child 1298 + else loop (i + 1) 1299 + in 1300 + loop 0 1301 + 1302 + (* Find child index for key in interior page *) 1303 + let find_child_idx t page header key = 1304 + let ptrs = cell_pointers page header in 1305 + let usable = usable_size t in 1306 + let rec loop i = 1307 + if i >= header.cell_count then i (* right child *) 1308 + else 1309 + let full_payload = 1310 + read_full_interior_payload t page ptrs.(i) ~usable_size:usable 1311 + in 1312 + if key_less_than key full_payload then i else loop (i + 1) 1313 + in 1314 + loop 0 1315 + 1316 + (* Split result: new page number and separator key *) 1317 + type split_result = { new_page : int; separator_key : string } 1318 + 1319 + (* Split a leaf page, returns info about the new page *) 1320 + let split_leaf t page_num = 1321 + let page = Pager.read t.pager page_num in 1322 + let header = parse_page_header page 0 in 1323 + let ptrs = cell_pointers page header in 1324 + let usable = usable_size t in 1325 + let page_size = Pager.page_size t.pager in 1326 + 1327 + (* Find split point (middle) *) 1328 + let split_idx = header.cell_count / 2 in 1329 + 1330 + (* Get separator key (first key that goes to right page) - read full payload *) 1331 + let separator_key = 1332 + read_full_payload t page ptrs.(split_idx) ~usable_size:usable 1333 + in 1334 + 1335 + (* Create new right page *) 1336 + let new_page_num = Pager.allocate t.pager in 1337 + let new_buf = init_page ~page_size ~page_type:Leaf_index in 1338 + 1339 + (* Copy cells [split_idx..cell_count-1] to new page *) 1340 + let new_cell_content_start = ref page_size in 1341 + for i = split_idx to header.cell_count - 1 do 1342 + let cell_off = ptrs.(i) in 1343 + (* Read full payload including from overflow pages *) 1344 + let full_payload = 1345 + read_full_payload t page cell_off ~usable_size:usable 1346 + in 1347 + (* Re-encode with overflow support *) 1348 + let cell_data = 1349 + encode_index_leaf_cell_with_overflow t ~payload:full_payload 1350 + in 1351 + new_cell_content_start := 1352 + write_cell new_buf ~cell_content_start:!new_cell_content_start 1353 + ~cell:cell_data; 1354 + let new_idx = i - split_idx in 1355 + let ptr_off = page_header_size Leaf_index + (new_idx * 2) in 1356 + set_u16_be new_buf ptr_off !new_cell_content_start 1357 + done; 1358 + let new_cell_count = header.cell_count - split_idx in 1359 + set_u16_be new_buf 3 new_cell_count; 1360 + set_u16_be new_buf 5 !new_cell_content_start; 1361 + Pager.write t.pager new_page_num (Bytes.unsafe_to_string new_buf); 1362 + 1363 + (* Update original page to only have cells [0..split_idx-1] *) 1364 + let old_buf = Bytes.of_string page in 1365 + set_u16_be old_buf 3 split_idx; 1366 + (* Recalculate cell content start for remaining cells *) 1367 + if split_idx > 0 then begin 1368 + let min_ptr = ref page_size in 1369 + for i = 0 to split_idx - 1 do 1370 + if ptrs.(i) < !min_ptr then min_ptr := ptrs.(i) 1371 + done; 1372 + set_u16_be old_buf 5 !min_ptr 1373 + end 1374 + else set_u16_be old_buf 5 page_size; 1375 + Pager.write t.pager page_num (Bytes.unsafe_to_string old_buf); 1376 + 1377 + { new_page = new_page_num; separator_key } 1378 + 1379 + (* Split an interior page *) 1380 + let split_interior t page_num = 1381 + let page = Pager.read t.pager page_num in 1382 + let header = parse_page_header page 0 in 1383 + let ptrs = cell_pointers page header in 1384 + let usable = usable_size t in 1385 + let page_size = Pager.page_size t.pager in 1386 + 1387 + (* Split point - middle cell becomes separator, doesn't go to either page *) 1388 + let split_idx = header.cell_count / 2 in 1389 + let sep_cell, _ = 1390 + Cell.parse_index_interior page ptrs.(split_idx) ~usable_size:usable 1391 + in 1392 + let separator_key = sep_cell.payload in 1393 + 1394 + (* Create new right page *) 1395 + let new_page_num = Pager.allocate t.pager in 1396 + let new_buf = init_page ~page_size ~page_type:Interior_index in 1397 + 1398 + (* Cells [split_idx+1..cell_count-1] go to new page *) 1399 + let new_cell_content_start = ref page_size in 1400 + for i = split_idx + 1 to header.cell_count - 1 do 1401 + let cell_off = ptrs.(i) in 1402 + let cell, _ = 1403 + Cell.parse_index_interior page cell_off ~usable_size:usable 1404 + in 1405 + let cell_data = 1406 + encode_index_interior_cell ~left_child:cell.left_child 1407 + ~payload:cell.payload 1408 + in 1409 + new_cell_content_start := 1410 + write_cell new_buf ~cell_content_start:!new_cell_content_start 1411 + ~cell:cell_data; 1412 + let new_idx = i - split_idx - 1 in 1413 + let ptr_off = page_header_size Interior_index + (new_idx * 2) in 1414 + set_u16_be new_buf ptr_off !new_cell_content_start 1415 + done; 1416 + let new_cell_count = header.cell_count - split_idx - 1 in 1417 + set_u16_be new_buf 3 new_cell_count; 1418 + set_u16_be new_buf 5 !new_cell_content_start; 1419 + (* Right child of new page is same as original *) 1420 + set_u32_be new_buf 8 (Option.get header.right_child); 1421 + Pager.write t.pager new_page_num (Bytes.unsafe_to_string new_buf); 1422 + 1423 + (* Update original page: keep cells [0..split_idx-1], right child = sep_cell.left_child *) 1424 + let old_buf = Bytes.of_string page in 1425 + set_u16_be old_buf 3 split_idx; 1426 + set_u32_be old_buf 8 sep_cell.left_child; 1427 + if split_idx > 0 then begin 1428 + let min_ptr = ref page_size in 1429 + for i = 0 to split_idx - 1 do 1430 + if ptrs.(i) < !min_ptr then min_ptr := ptrs.(i) 1431 + done; 1432 + set_u16_be old_buf 5 !min_ptr 1433 + end 1434 + else set_u16_be old_buf 5 page_size; 1435 + Pager.write t.pager page_num (Bytes.unsafe_to_string old_buf); 1436 + 1437 + { new_page = new_page_num; separator_key } 1438 + 1439 + (* Insert a separator into an interior page, potentially splitting *) 1440 + let rec insert_into_interior t page_num ~left_child ~separator_key 1441 + ~right_child = 1442 + let page = Pager.read t.pager page_num in 1443 + let header = parse_page_header page 0 in 1444 + let cell = encode_index_interior_cell ~left_child ~payload:separator_key in 1445 + let cell_len = String.length cell in 1446 + let space_needed = cell_len + 2 in 1447 + (* cell + pointer *) 1448 + 1449 + if free_space header ~page_type:Interior_index >= space_needed then begin 1450 + (* Fits - insert directly *) 1451 + let buf = Bytes.of_string page in 1452 + let ptrs = cell_pointers page header in 1453 + let usable = usable_size t in 1454 + 1455 + (* Find insert position *) 1456 + let insert_idx = 1457 + let rec find i = 1458 + if i >= header.cell_count then i 1459 + else 1460 + let c, _ = 1461 + Cell.parse_index_interior page ptrs.(i) ~usable_size:usable 1462 + in 1463 + if separator_key < c.payload then i else find (i + 1) 1464 + in 1465 + find 0 1466 + in 1467 + 1468 + (* Write cell *) 1469 + let cell_start = 1470 + write_cell buf ~cell_content_start:header.cell_content_start ~cell 1471 + in 1472 + set_u16_be buf 5 cell_start; 1473 + 1474 + (* Insert pointer *) 1475 + insert_cell_pointer buf ~header_offset:0 ~page_type:Interior_index 1476 + ~cell_count:header.cell_count ~index:insert_idx ~ptr:cell_start; 1477 + 1478 + (* Update cell count *) 1479 + set_u16_be buf 3 (header.cell_count + 1); 1480 + 1481 + (* Update child pointers *) 1482 + if insert_idx < header.cell_count then begin 1483 + let ptr_start = page_header_size Interior_index in 1484 + let displaced_ptr = 1485 + get_u16_be 1486 + (Bytes.unsafe_to_string buf) 1487 + (ptr_start + ((insert_idx + 1) * 2)) 1488 + in 1489 + set_u32_be buf displaced_ptr right_child 1490 + end 1491 + else set_u32_be buf 8 right_child; 1492 + 1493 + Pager.write t.pager page_num (Bytes.unsafe_to_string buf); 1494 + None 1495 + end 1496 + else begin 1497 + (* Need to split interior page *) 1498 + let split = split_interior t page_num in 1499 + 1500 + (* Determine which page gets the new separator *) 1501 + if separator_key < split.separator_key then begin 1502 + ignore 1503 + (insert_into_interior t page_num ~left_child ~separator_key 1504 + ~right_child) 1505 + end 1506 + else begin 1507 + ignore 1508 + (insert_into_interior t split.new_page ~left_child ~separator_key 1509 + ~right_child) 1510 + end; 1511 + 1512 + Some { split with separator_key = split.separator_key } 1513 + end 1514 + 1515 + (* Insert into a leaf page, potentially splitting *) 1516 + let rec insert_into_leaf t page_num ~key ~parent_stack = 1517 + let page = Pager.read t.pager page_num in 1518 + let header = parse_page_header page 0 in 1519 + let cell = encode_index_leaf_cell_with_overflow t ~payload:key in 1520 + let cell_len = String.length cell in 1521 + let space_needed = cell_len + 2 in 1522 + (* cell + pointer *) 1523 + 1524 + if free_space header ~page_type:Leaf_index >= space_needed then begin 1525 + (* Fits - insert directly *) 1526 + let buf = Bytes.of_string page in 1527 + let insert_idx = find_insert_idx t page header key in 1528 + 1529 + (* Write cell *) 1530 + let cell_start = 1531 + write_cell buf ~cell_content_start:header.cell_content_start ~cell 1532 + in 1533 + set_u16_be buf 5 cell_start; 1534 + 1535 + (* Insert pointer *) 1536 + insert_cell_pointer buf ~header_offset:0 ~page_type:Leaf_index 1537 + ~cell_count:header.cell_count ~index:insert_idx ~ptr:cell_start; 1538 + 1539 + (* Update cell count *) 1540 + set_u16_be buf 3 (header.cell_count + 1); 1541 + 1542 + Pager.write t.pager page_num (Bytes.unsafe_to_string buf) 1543 + end 1544 + else begin 1545 + (* Need to split *) 1546 + let split = split_leaf t page_num in 1547 + 1548 + (* Determine target page and insert *) 1549 + let target_page = 1550 + if key < split.separator_key then page_num else split.new_page 1551 + in 1552 + let target = Pager.read t.pager target_page in 1553 + let target_header = parse_page_header target 0 in 1554 + let target_buf = Bytes.of_string target in 1555 + let insert_idx = find_insert_idx t target target_header key in 1556 + let cell_start = 1557 + write_cell target_buf 1558 + ~cell_content_start:target_header.cell_content_start ~cell 1559 + in 1560 + set_u16_be target_buf 5 cell_start; 1561 + insert_cell_pointer target_buf ~header_offset:0 ~page_type:Leaf_index 1562 + ~cell_count:target_header.cell_count ~index:insert_idx ~ptr:cell_start; 1563 + set_u16_be target_buf 3 (target_header.cell_count + 1); 1564 + Pager.write t.pager target_page (Bytes.unsafe_to_string target_buf); 1565 + 1566 + (* Propagate split up *) 1567 + propagate_split t ~parent_stack ~left_page:page_num 1568 + ~separator_key:split.separator_key ~right_page:split.new_page 1569 + end 1570 + 1571 + and propagate_split t ~parent_stack ~left_page ~separator_key ~right_page = 1572 + match parent_stack with 1573 + | [] -> 1574 + (* Splitting root - create new root *) 1575 + let page_size = Pager.page_size t.pager in 1576 + let new_root = Pager.allocate t.pager in 1577 + let buf = init_page ~page_size ~page_type:Interior_index in 1578 + 1579 + (* Single cell pointing to left page *) 1580 + let cell = 1581 + encode_index_interior_cell ~left_child:left_page 1582 + ~payload:separator_key 1583 + in 1584 + let cell_start = write_cell buf ~cell_content_start:page_size ~cell in 1585 + set_u16_be buf 5 cell_start; 1586 + set_u16_be buf (page_header_size Interior_index) cell_start; 1587 + set_u16_be buf 3 1; 1588 + set_u32_be buf 8 right_page; 1589 + 1590 + Pager.write t.pager new_root (Bytes.unsafe_to_string buf); 1591 + t.root_page <- new_root 1592 + | parent_page :: rest -> ( 1593 + match 1594 + insert_into_interior t parent_page ~left_child:left_page 1595 + ~separator_key ~right_child:right_page 1596 + with 1597 + | None -> () (* Fit in parent *) 1598 + | Some split -> 1599 + (* Parent also split, propagate up *) 1600 + propagate_split t ~parent_stack:rest ~left_page:parent_page 1601 + ~separator_key:split.separator_key ~right_page:split.new_page) 1602 + 1603 + (* Main insert - traverses tree and handles splits *) 1604 + let insert t key = 1605 + let rec traverse page_num parent_stack = 1606 + let page = Pager.read t.pager page_num in 1607 + let header = parse_page_header page 0 in 1608 + match header.page_type with 1609 + | Leaf_index -> ( 1610 + (* Check if key already exists *) 1611 + match find_in_leaf t page header key with 1612 + | Some _ -> () (* Key exists, do nothing (set semantics) *) 1613 + | None -> insert_into_leaf t page_num ~key ~parent_stack) 1614 + | Interior_index -> 1615 + let child_idx = find_child_idx t page header key in 1616 + let child_page = 1617 + if child_idx >= header.cell_count then Option.get header.right_child 1618 + else 1619 + let ptrs = cell_pointers page header in 1620 + let usable = usable_size t in 1621 + let cell, _ = 1622 + Cell.parse_index_interior page ptrs.(child_idx) 1623 + ~usable_size:usable 1624 + in 1625 + cell.left_child 1626 + in 1627 + traverse child_page (page_num :: parent_stack) 1628 + | _ -> failwith "Invalid page type in index B-tree" 1629 + in 1630 + traverse t.root_page [] 1631 + 1632 + (* Delete a cell from a leaf page at given index *) 1633 + let delete_from_leaf t page_num ~index = 1634 + let page = Pager.read t.pager page_num in 1635 + let header = parse_page_header page 0 in 1636 + if header.cell_count = 0 then () 1637 + else begin 1638 + let buf = Bytes.of_string page in 1639 + let ptr_start = page_header_size Leaf_index in 1640 + 1641 + (* Shift pointers left to remove the entry at index *) 1642 + for i = index to header.cell_count - 2 do 1643 + let next_ptr = get_u16_be page (ptr_start + ((i + 1) * 2)) in 1644 + set_u16_be buf (ptr_start + (i * 2)) next_ptr 1645 + done; 1646 + 1647 + (* Decrease cell count *) 1648 + set_u16_be buf 3 (header.cell_count - 1); 1649 + 1650 + (* Note: We don't reclaim space - it becomes fragmented. 1651 + A proper implementation would compact or track free space. *) 1652 + Pager.write t.pager page_num (Bytes.unsafe_to_string buf) 1653 + end 1654 + 1655 + (* Delete implementation - simplified, doesn't rebalance *) 1656 + let delete t key = 1657 + let rec traverse page_num = 1658 + let page = Pager.read t.pager page_num in 1659 + let header = parse_page_header page 0 in 1660 + match header.page_type with 1661 + | Leaf_index -> ( 1662 + match find_in_leaf t page header key with 1663 + | Some (_, idx) -> delete_from_leaf t page_num ~index:idx 1664 + | None -> () (* Key not found, nothing to do *)) 1665 + | Interior_index -> 1666 + let child = find_child t page header key in 1667 + traverse child 1668 + | _ -> failwith "Invalid page type in index B-tree" 1669 + in 1670 + traverse t.root_page 1671 + 1672 + (* Find by prefix - returns first entry starting with prefix *) 1673 + let rec find_by_prefix_in_page t page_num prefix = 1674 + let page = Pager.read t.pager page_num in 1675 + let header = parse_page_header page 0 in 1676 + let ptrs = cell_pointers page header in 1677 + let usable = usable_size t in 1678 + let prefix_len = String.length prefix in 1679 + let starts_with payload = 1680 + String.length payload >= prefix_len 1681 + && String.sub payload 0 prefix_len = prefix 1682 + in 1683 + match header.page_type with 1684 + | Leaf_index -> 1685 + (* Linear search for first entry with prefix *) 1686 + let rec find_first i = 1687 + if i >= header.cell_count then None 1688 + else 1689 + let full_payload = 1690 + read_full_payload t page ptrs.(i) ~usable_size:usable 1691 + in 1692 + if starts_with full_payload then Some full_payload 1693 + else if full_payload > prefix then None 1694 + else find_first (i + 1) 1695 + in 1696 + find_first 0 1697 + | Interior_index -> 1698 + let child = find_child t page header prefix in 1699 + find_by_prefix_in_page t child prefix 1700 + | _ -> failwith "Invalid page type in index B-tree" 1701 + 1702 + let find_by_prefix t prefix = find_by_prefix_in_page t t.root_page prefix 1703 + 1704 + (* Delete by prefix - deletes first entry starting with prefix *) 1705 + let rec delete_by_prefix_in_page t page_num prefix = 1706 + let page = Pager.read t.pager page_num in 1707 + let header = parse_page_header page 0 in 1708 + let ptrs = cell_pointers page header in 1709 + let usable = usable_size t in 1710 + let prefix_len = String.length prefix in 1711 + let starts_with payload = 1712 + String.length payload >= prefix_len 1713 + && String.sub payload 0 prefix_len = prefix 1714 + in 1715 + match header.page_type with 1716 + | Leaf_index -> ( 1717 + (* Find first entry with prefix *) 1718 + let rec find_idx i = 1719 + if i >= header.cell_count then None 1720 + else 1721 + let full_payload = 1722 + read_full_payload t page ptrs.(i) ~usable_size:usable 1723 + in 1724 + if starts_with full_payload then Some i 1725 + else if full_payload > prefix then None 1726 + else find_idx (i + 1) 1727 + in 1728 + match find_idx 0 with 1729 + | Some idx -> delete_from_leaf t page_num ~index:idx 1730 + | None -> ()) 1731 + | Interior_index -> 1732 + let child = find_child t page header prefix in 1733 + delete_by_prefix_in_page t child prefix 1734 + | _ -> failwith "Invalid page type in index B-tree" 1735 + 1736 + let delete_by_prefix t prefix = delete_by_prefix_in_page t t.root_page prefix 1006 1737 1007 1738 let iter t f = 1008 1739 let rec iter_page page_num = ··· 1013 1744 match header.page_type with 1014 1745 | Leaf_index -> 1015 1746 for i = 0 to header.cell_count - 1 do 1016 - let cell, _ = 1017 - Cell.parse_index_leaf page ptrs.(i) ~usable_size:usable 1747 + let full_payload = 1748 + read_full_payload t page ptrs.(i) ~usable_size:usable 1018 1749 in 1019 - f cell.payload 1750 + f full_payload 1020 1751 done 1021 1752 | Interior_index -> 1022 1753 for i = 0 to header.cell_count - 1 do
+17 -3
lib/btree.mli
··· 196 196 197 197 module Index : sig 198 198 type t 199 + (** An index B-tree for string keys. *) 199 200 200 201 val create : Pager.t -> t 201 202 (** [create pager] creates a new empty index B-tree. *) ··· 209 210 val mem : t -> string -> bool 210 211 (** [mem t key] returns true if [key] exists in the index. *) 211 212 213 + val find : t -> string -> string option 214 + (** [find t key] returns the payload for [key] if it exists. *) 215 + 212 216 val insert : t -> string -> unit 213 - (** [insert t key] inserts a key. *) 217 + (** [insert t key] inserts a key. If the key already exists, this is a no-op 218 + (set semantics). *) 214 219 215 220 val delete : t -> string -> unit 216 - (** [delete t key] deletes a key. *) 221 + (** [delete t key] removes a key. If the key doesn't exist, this is a no-op. 222 + Note: This is a simplified implementation that doesn't rebalance the tree. 223 + *) 224 + 225 + val find_by_prefix : t -> string -> string option 226 + (** [find_by_prefix t prefix] finds the first entry starting with [prefix]. *) 227 + 228 + val delete_by_prefix : t -> string -> unit 229 + (** [delete_by_prefix t prefix] deletes the first entry starting with 230 + [prefix]. *) 217 231 218 232 val iter : t -> (string -> unit) -> unit 219 - (** [iter t f] calls [f key] for each key in order. *) 233 + (** [iter t f] calls [f key] for each key in sorted order. *) 220 234 end