objective categorical abstract machine language personal data server
65
fork

Configure Feed

Select the types of activity you want to include in your feed.

Implement MST covering proof

futurGH e1cce0f6 57bea8a7

+197 -97
+197 -97
mist/lib/mst.ml
··· 1 + open Storage 1 2 module StringMap = Dag_cbor.StringMap 2 3 3 4 type node_raw = ··· 37 38 38 39 and entry = 39 40 {layer: int; key: string; value: Cid.t; right: node option Lwt.t Lazy.t} 41 + 42 + type node_or_entry = Node of node | Entry of entry 40 43 41 44 let ( let*? ) lazy_opt_lwt f = 42 45 let%lwt result = Lazy.force lazy_opt_lwt in ··· 85 88 node.entries <- node.entries @ [entry] ; 86 89 Lwt.return node ) 87 90 88 - (* helper to find the entry with a given key in a hydrated node *) 89 - let find_entry_nonrec node key = 90 - let rec aux entries = 91 + (* returns the index of the first entry in an interspersed list that's gte a given key *) 92 + let find_gte_entry_index entries key : int = 93 + let rec aux entries index = 91 94 match entries with 92 95 | [] -> 93 - None 94 - | e :: es -> 95 - if e.key = key then Some e else if e.key > key then None else aux es 96 + (* will be entries length when not found *) 97 + index 98 + | e :: es -> ( 99 + match e with 100 + | Entry entry when entry.key >= key -> 101 + index 102 + | _ -> 103 + aux es (index + 1) ) 96 104 in 97 - aux node.entries 105 + aux entries 0 98 106 99 - (* from a list of raw entries, produces a list of their keys *) 100 - let entries_to_keys entries = 101 - entries 102 - |> List.fold_left 103 - (fun keys entry -> 104 - let prefix = 105 - match keys with [] -> "" | prev :: _ -> String.sub prev 0 entry.p 106 - in 107 - let path = String.concat "" [prefix; Bytes.to_string entry.k] in 108 - Util.ensure_valid_key path ; path :: keys ) 109 - [] 110 - |> List.rev 107 + (* produces a cid and cbor-encoded bytes for a given tree *) 108 + let serialize node : (Cid.t * bytes) Lwt.t = 109 + let sorted_entries = 110 + List.sort (fun a b -> String.compare a.key b.key) node.entries 111 + in 112 + let rec aux node = 113 + let%lwt left = 114 + node.left 115 + >>? function 116 + | Some l -> 117 + let%lwt cid, _ = aux l in 118 + Lwt.return_some cid 119 + | None -> 120 + Lwt.return_none 121 + in 122 + let last_key = ref "" in 123 + let%lwt mst_entries = 124 + Lwt_list.map_s 125 + (fun entry -> 126 + let%lwt right = 127 + entry.right 128 + >>? function 129 + | Some r -> 130 + let%lwt cid, _ = aux r in 131 + Lwt.return (Some cid) 132 + | None -> 133 + Lwt.return None 134 + in 135 + let prefix_len = Util.shared_prefix_length !last_key entry.key in 136 + last_key := entry.key ; 137 + Lwt.return 138 + { k= 139 + Bytes.of_string 140 + (String.sub entry.key prefix_len 141 + (String.length entry.key - prefix_len) ) 142 + ; p= prefix_len 143 + ; v= entry.value 144 + ; t= right } ) 145 + node.entries 146 + in 147 + let encoded = Dag_cbor.encode (encode_node_raw {l= left; e= mst_entries}) in 148 + let cid = Cid.create Dcbor encoded in 149 + Lwt.return (cid, encoded) 150 + in 151 + aux {node with entries= sorted_entries} 111 152 112 - module Make (Store : Storage.Writable_blockstore) = struct 153 + module Make (Store : Writable_blockstore) = struct 113 154 type bs = Store.t 114 155 115 156 type t = {blockstore: bs; root: Cid.t} ··· 168 209 | _ -> 169 210 raise (Invalid_argument "invalid block") 170 211 212 + (* retrieves a raw node by cid *) 171 213 let retrieve_node_raw t cid : node_raw option Lwt.t = 172 214 match%lwt Store.get_bytes t.blockstore cid with 173 215 | Some bytes -> ··· 175 217 | None -> 176 218 Lwt.return_none 177 219 178 - (* retrieves & decodes a node by cid *) 220 + (* retrieves & hydrates a node by cid *) 179 221 let rec retrieve_node t cid : node option Lwt.t = 180 222 match%lwt retrieve_node_raw t cid with 181 223 | Some raw -> ··· 183 225 | None -> 184 226 Lwt.return_none 185 227 228 + (* lazy version of retrieve_node *) 186 229 and retrieve_node_lazy t cid = lazy (retrieve_node t cid) 187 230 188 231 (* hydrates a raw node *) ··· 261 304 traverse t (fun path cid -> ignore (StringMap.add path cid map)) 262 305 in 263 306 Lwt.return map 264 - 265 - (* produces a cid and cbor-encoded bytes for this mst *) 266 - let serialize t map : (Cid.t * bytes) Lwt.t = 267 - let keys = 268 - map |> StringMap.bindings |> List.map fst |> List.sort String.compare 269 - in 270 - let entry_for_key key = 271 - let value = StringMap.find key map in 272 - let height = Util.leading_zeros_on_hash key in 273 - {layer= height; key; value; right= lazy Lwt.return_none} 274 - in 275 - let root = 276 - { layer= keys |> List.hd |> Util.leading_zeros_on_hash 277 - ; entries= [] 278 - ; left= lazy Lwt.return_none } 279 - in 280 - List.iter 281 - (fun key -> ignore (insert_entry root (entry_for_key key))) 282 - (List.tl keys) ; 283 - let rec finalize node : (Cid.t * bytes) Lwt.t = 284 - let%lwt left = 285 - node.left 286 - >>? function 287 - | Some l -> 288 - let%lwt cid, _ = finalize l in 289 - Lwt.return_some cid 290 - | None -> 291 - Lwt.return_none 292 - in 293 - let last_key = ref "" in 294 - let%lwt mst_entries = 295 - Lwt_list.map_s 296 - (fun entry -> 297 - let%lwt right = 298 - entry.right 299 - >>? function 300 - | Some r -> 301 - let%lwt cid, _ = finalize r in 302 - Lwt.return (Some cid) 303 - | None -> 304 - Lwt.return None 305 - in 306 - let prefix_len = Util.shared_prefix_length !last_key entry.key in 307 - last_key := entry.key ; 308 - Lwt.return 309 - { k= 310 - Bytes.of_string 311 - (String.sub entry.key prefix_len 312 - (String.length entry.key - prefix_len) ) 313 - ; p= prefix_len 314 - ; v= entry.value 315 - ; t= right } ) 316 - node.entries 317 - in 318 - let encoded = 319 - Dag_cbor.encode (encode_node_raw {l= left; e= mst_entries}) 320 - in 321 - let cid = Cid.create Dcbor encoded in 322 - let%lwt () = Store.put_block t.blockstore cid encoded in 323 - Lwt.return (cid, encoded) 324 - in 325 - finalize root 326 307 327 308 (* returns cids and blocks that form the path from a given node to a given entry *) 328 309 let rec path_to_entry t node key : (Cid.t * bytes) list Lwt.t = 329 310 let%lwt root_bytes = Store.get_bytes t.blockstore node in 330 - let%lwt root = 311 + let%lwt root_raw = 331 312 match root_bytes with 332 313 | None -> 333 314 Lwt.return_none 334 315 | Some bytes -> 335 316 Lwt.return_some (decode_block_raw bytes) 336 317 in 337 - let path_tail = [(node, Option.get root_bytes)] in 318 + let%lwt root = 319 + match root_raw with 320 + | None -> 321 + Lwt.return_none 322 + | Some root -> 323 + hydrate_node t root |> Lwt.map Option.some 324 + in 338 325 (* if there is a left child, try to find a path through the left subtree *) 339 326 let%lwt path_through_left = 340 - match root with 327 + match root_raw with 341 328 | None -> 342 329 Lwt.return_some [] 343 - | Some root -> ( 344 - match root.l with 330 + | Some raw -> ( 331 + match raw.l with 345 332 | None -> 346 333 Lwt.return_none 347 334 | Some left -> ( ··· 350 337 Lwt.return_none 351 338 | path -> 352 339 (* Option.get is safe because root is Some only when root_bytes is Some *) 353 - Lwt.return_some (path @ path_tail) ) ) 340 + Lwt.return_some (path @ [(node, Option.get root_bytes)]) ) ) 354 341 in 355 342 match path_through_left with 356 343 | Some path -> 357 344 Lwt.return path 358 345 | None -> ( 359 346 (* if a left subtree path couldn't be found, find the entry whose right subtree this key would belong to *) 360 - let entries = (Option.get root).e in 361 - let entries_keys = entries_to_keys entries in 347 + (* this branch is only reached when root/root_raw/root_bytes are not None; 348 + if they were, path_through_left would be Some [] *) 349 + let entries = (Option.get root).entries in 362 350 let entries_len = List.length entries in 363 351 let entry_index = 364 - match List.find_index (fun e -> e >= key) entries_keys with 352 + match List.find_index (fun e -> e.key >= key) entries with 365 353 | Some index -> 366 354 index 367 355 | None -> 368 356 entries_len 369 357 in 358 + (* path_through_left is None -> root_bytes is Some *) 359 + let path_tail = [(node, Option.get root_bytes)] in 370 360 (* entry_index here is actually the entry to the right of the subtree the key would belong to *) 371 361 match entry_index with 372 362 | _ 373 363 (* because entries[entry_index] might turn out to be the entry we're looking for *) 374 364 when entry_index < entries_len 375 - && List.nth entries_keys entry_index = key -> 365 + && (List.nth entries entry_index).key = key -> 376 366 Lwt.return path_tail 377 367 | _ -> ( 378 368 (* otherwise, we continue down the right subtree of the entry before entry_index *) 379 - match Util.last entries with 380 - | Some last when last.t != None -> 369 + (* path_through_left is None -> root_raw is Some *) 370 + match Util.last (Option.get root_raw).e with 371 + | Some last when last.t <> None -> 381 372 let%lwt path_through_right = 373 + (* when last.t <> None *) 382 374 path_to_entry t (Option.get last.t) key 383 375 in 384 376 Lwt.return (path_through_right @ path_tail) ··· 415 407 let leaves_list = Cid.Set.to_list leaves in 416 408 let%lwt leaves_bm = Store.get_blocks t.blockstore leaves_list in 417 409 if leaves_bm.missing <> [] then failwith "missing mst leaf blocks" ; 418 - let leaves_nodes = Storage.Block_map.entries leaves_bm.blocks in 410 + let leaves_nodes = Block_map.entries leaves_bm.blocks in 419 411 match leaves_nodes with 420 412 | [] -> 421 413 (* with Done, we don't care about the first pair element *) ··· 432 424 (fun (acc, nxt, lvs) cid -> 433 425 let bytes = 434 426 (* we should be safe to do this since we just got the cids from the blockmap *) 435 - Storage.Block_map.get cid bm.blocks |> Option.get 427 + Block_map.get cid bm.blocks |> Option.get 436 428 in 437 429 let node = decode_block_raw bytes in 438 430 let nxt' = ··· 470 462 Lwt.return_some (Obj.magic (), Done) 471 463 in 472 464 Lwt_seq.unfold_lwt step init_state 465 + 466 + (* returns all mst nodes needed to prove the value of a given key *) 467 + let rec proof_for_key t root key : Block_map.t Lwt.t = 468 + let e_rev = List.rev root.entries in 469 + (* iterate in reverse because if the key doesn't exist at this level, 470 + we need to search the "previous" node's right subtree *) 471 + let rec find_proof entries_rev = 472 + match entries_rev with 473 + | [] -> 474 + Lwt.return Block_map.empty 475 + | e :: rest -> ( 476 + if e.key > key then find_proof rest 477 + else if e.key = key then Lwt.return Block_map.empty 478 + else 479 + let*? right = e.right in 480 + match right with 481 + | Some r -> 482 + proof_for_key t r key 483 + | None -> 484 + Lwt.return Block_map.empty ) 485 + in 486 + let%lwt bm = find_proof e_rev in 487 + let%lwt root_cid, root_bytes = serialize root in 488 + Lwt.return (Block_map.set root_cid root_bytes bm) 489 + 490 + (* returns all mst nodes needed to prove the value of a given key's left sibling *) 491 + let rec proof_for_left_sibling t root key : Block_map.t Lwt.t = 492 + let e_rev = List.rev root.entries in 493 + (* iterate in reverse for the same reason as proof_for_key *) 494 + let rec find_proof entries_rev = 495 + match entries_rev with 496 + | [] -> 497 + Lwt.return Block_map.empty 498 + | e :: rest -> ( 499 + if e.key >= key then find_proof rest 500 + else 501 + let*? right = e.right in 502 + match right with 503 + | Some r -> 504 + proof_for_left_sibling t r key 505 + | None -> 506 + Lwt.return Block_map.empty ) 507 + in 508 + let%lwt bm = find_proof e_rev in 509 + let%lwt root_cid, root_bytes = serialize root in 510 + Lwt.return (Block_map.set root_cid root_bytes bm) 511 + 512 + (* returns all mst nodes needed to prove the value of a given key's right sibling *) 513 + let rec proof_for_right_sibling t root key : Block_map.t Lwt.t = 514 + (* unlike the other two, this doesn't iterate in reverse 515 + because we can stop as soon as we're past the key *) 516 + let rec find_proof ?(prev = None) entries = 517 + match entries with 518 + | [] -> ( 519 + (* end of entries, right sibling must be in the last entry's right subtree *) 520 + match prev with 521 + | Some e -> ( 522 + let*? right = e.right in 523 + match right with 524 + | Some r -> 525 + proof_for_right_sibling t r key 526 + | None -> 527 + Lwt.return Block_map.empty ) 528 + | None -> 529 + Lwt.return Block_map.empty ) 530 + | e :: rest -> 531 + if e.key > key then 532 + (* we're past target key; right sibling is in previous entry's right subtree *) 533 + match prev with 534 + | Some p -> ( 535 + let*? right = p.right in 536 + match right with 537 + | Some r -> 538 + proof_for_right_sibling t r key 539 + (* I don't think this should ever happen? *) 540 + | None -> 541 + Lwt.return Block_map.empty ) 542 + (* first entry is already greater than key; we're inside the sibling *) 543 + | None -> 544 + Lwt.return Block_map.empty 545 + else if e.key = key then 546 + (* found the entry, right sibling is in its right subtree *) 547 + let*? right = e.right in 548 + match right with 549 + | Some r -> 550 + proof_for_right_sibling t r key 551 + | None -> 552 + Lwt.return Block_map.empty 553 + else (* e.key < key, keep searching *) 554 + find_proof ~prev:(Some e) rest 555 + in 556 + let%lwt bm = find_proof root.entries in 557 + let%lwt root_cid, root_bytes = serialize root in 558 + Lwt.return (Block_map.set root_cid root_bytes bm) 559 + 560 + (* a covering proof is all mst nodes needed to prove the value of a given leaf 561 + and its siblings to its immediate right and left (if applicable) *) 562 + let get_covering_proof t root key : Block_map.t Lwt.t = 563 + let%lwt proofs = 564 + Lwt.all 565 + [ proof_for_key t root key 566 + ; proof_for_left_sibling t root key 567 + ; proof_for_right_sibling t root key ] 568 + in 569 + Lwt.return 570 + (List.fold_left 571 + (fun acc proof -> Block_map.merge acc proof) 572 + Block_map.empty proofs ) 473 573 end