···3131 [ ("l", match node.l with Some l -> `Link l | None -> `Null)
3232 ; ("e", `Array (Array.of_list (List.map encode_entry_raw node.e))) ] )
33333434+(* decodes a node from cbor bytes *)
3535+let decode_block_raw b : node_raw =
3636+ match Dag_cbor.decode b with
3737+ | `Map node ->
3838+ if not (String_map.mem "e" node) then
3939+ raise (Invalid_argument "mst node missing 'e'") ;
4040+ let l =
4141+ if String_map.mem "l" node then
4242+ match String_map.find "l" node with `Link l -> Some l | _ -> None
4343+ else None
4444+ in
4545+ let e_array =
4646+ match String_map.find "e" node with `Array e -> e | _ -> [||]
4747+ in
4848+ let e =
4949+ Array.to_list
5050+ @@ Array.map
5151+ (fun (entry : Dag_cbor.value) ->
5252+ match entry with
5353+ | `Map entry ->
5454+ { p=
5555+ ( entry |> String_map.find "p"
5656+ |> function
5757+ | `Integer p ->
5858+ Int64.to_int p
5959+ | _ ->
6060+ raise (Invalid_argument "mst entry missing 'p'") )
6161+ ; k=
6262+ ( entry |> String_map.find "k"
6363+ |> function
6464+ | `Bytes k ->
6565+ k
6666+ | _ ->
6767+ raise (Invalid_argument "mst entry missing 'k'") )
6868+ ; v=
6969+ ( entry |> String_map.find "v"
7070+ |> function
7171+ | `Link v ->
7272+ v
7373+ | _ ->
7474+ raise (Invalid_argument "mst entry missing 'v'") )
7575+ ; t=
7676+ ( entry |> String_map.find "t"
7777+ |> function `Link t -> Some t | _ -> None ) }
7878+ | _ ->
7979+ raise (Invalid_argument "non-map mst entry") )
8080+ e_array
8181+ in
8282+ {l; e}
8383+ | _ ->
8484+ raise (Invalid_argument "invalid block")
8585+8686+(* items yielded by ordered stream; either an mst node block or a record cid *)
8787+type ordered_item = Node of Cid.t * bytes | Leaf of Cid.t
8888+3489type node =
3590 { layer: int
3691 ; mutable left: node option Lwt.t Lazy.t
···8413985140 val to_blocks_stream : t -> (Cid.t * bytes) Lwt_seq.t
86141142142+ val to_ordered_stream : t -> ordered_item Lwt_seq.t
143143+87144 val serialize : t -> node -> (Cid.t * bytes, exn) Lwt_result.t
8814589146 val proof_for_key : t -> Cid.t -> string -> Block_map.t Lwt.t
···126183127184 let create blockstore root = {blockstore; root}
128185129129- (* decodes a node retrieved from the blockstore *)
130130- let decode_block_raw b : node_raw =
131131- match Dag_cbor.decode b with
132132- | `Map node ->
133133- if not (String_map.mem "e" node) then
134134- raise (Invalid_argument "mst node missing 'e'") ;
135135- let l =
136136- if String_map.mem "l" node then
137137- match String_map.find "l" node with `Link l -> Some l | _ -> None
138138- else None
139139- in
140140- let e_array =
141141- match String_map.find "e" node with `Array e -> e | _ -> [||]
142142- in
143143- let e =
144144- Array.to_list
145145- @@ Array.map
146146- (fun (entry : Dag_cbor.value) ->
147147- match entry with
148148- | `Map entry ->
149149- { p=
150150- ( entry |> String_map.find "p"
151151- |> function
152152- | `Integer p ->
153153- Int64.to_int p
154154- | _ ->
155155- raise (Invalid_argument "mst entry missing 'p'") )
156156- ; k=
157157- ( entry |> String_map.find "k"
158158- |> function
159159- | `Bytes k ->
160160- k
161161- | _ ->
162162- raise (Invalid_argument "mst entry missing 'k'") )
163163- ; v=
164164- ( entry |> String_map.find "v"
165165- |> function
166166- | `Link v ->
167167- v
168168- | _ ->
169169- raise (Invalid_argument "mst entry missing 'v'") )
170170- ; t=
171171- ( entry |> String_map.find "t"
172172- |> function `Link t -> Some t | _ -> None ) }
173173- | _ ->
174174- raise (Invalid_argument "non-map mst entry") )
175175- e_array
176176- in
177177- {l; e}
178178- | _ ->
179179- raise (Invalid_argument "invalid block")
180180-181186 (* retrieves a raw node by cid *)
182187 let retrieve_node_raw t cid : node_raw option Lwt.t =
183188 match%lwt Store.get_bytes t.blockstore cid with
···281286 in
282287 Lwt.return !map
283288284284- (* returns all mst entries in order for a car stream *)
289289+ (* returns all non-leaf mst node blocks in order for a car stream
290290+ leaf cids can be obtained via collect_nodes_and_leaves or leaves_of_root *)
285291 let to_blocks_stream t : (Cid.t * bytes) Lwt_seq.t =
286286- let module M = struct
287287- type stage =
288288- (* currently walking nodes *)
289289- | Nodes of
290290- { next: Cid.t list (* next cids to fetch *)
291291- ; fetched: (Cid.t * bytes) list (* fetched cids and their bytes *)
292292- ; leaves_seen: Cid.Set.t (* seen leaf cids for dedupe *)
293293- ; leaves_rev: Cid.t list (* reversed encounter order of leaves *) }
294294- (* done walking nodes, streaming accumulated leaves *)
295295- | Leaves of (Cid.t * bytes) list
296296- | Done
297297- end in
298298- let open M in
299299- let init_state =
300300- Nodes
301301- {next= [t.root]; fetched= []; leaves_seen= Cid.Set.empty; leaves_rev= []}
302302- in
303303- let rec step = function
304304- | Done ->
305305- Lwt.return_none
292292+ (* (next cids to fetch list, fetched (cid * bytes) list) *)
293293+ let init_state = ([t.root], []) in
294294+ let rec step (next, fetched) =
295295+ match fetched with
306296 (* node has been fetched, can now be yielded *)
307307- | Nodes ({fetched= (cid, bytes) :: rest; _} as s) ->
308308- Lwt.return_some ((cid, bytes), Nodes {s with fetched= rest})
297297+ | (cid, bytes) :: rest ->
298298+ Lwt.return_some ((cid, bytes), (next, rest))
309299 (* need to fetch next nodes *)
310310- | Nodes {next; fetched= []; leaves_seen; leaves_rev} ->
311311- if List.is_empty next then (
312312- (* finished traversing nodes, time to switch to leaves *)
313313- let leaves_list = List.rev leaves_rev in
314314- let%lwt leaves_bm = Store.get_blocks t.blockstore leaves_list in
315315- if leaves_bm.missing <> [] then failwith "missing mst leaf blocks" ;
316316- let leaves_nodes =
317317- List.map
318318- (fun cid ->
319319- let bytes =
320320- Block_map.get cid leaves_bm.blocks |> Option.get
321321- in
322322- (cid, bytes) )
323323- leaves_list
324324- in
325325- match leaves_nodes with
326326- | [] ->
327327- (* with Done, we don't care about the first pair element *)
328328- Lwt.return_some (Obj.magic (), Done)
329329- | _ ->
330330- (* it's leafin time *)
331331- step (Leaves leaves_nodes) )
300300+ | [] ->
301301+ if List.is_empty next then Lwt.return_none
332302 else
333303 (* go ahead and fetch the next nodes *)
334304 let%lwt bm = Store.get_blocks t.blockstore next in
335305 if bm.missing <> [] then failwith "missing mst nodes" ;
336336- let fetched, next', leaves_seen', leaves_rev' =
306306+ let fetched', next' =
337307 List.fold_left
338338- (fun (acc, nxt, seen, rev) cid ->
308308+ (fun (acc, nxt) cid ->
339309 let bytes =
340310 (* we should be safe to do this since we just got the cids from the blockmap *)
341311 Block_map.get cid bm.blocks |> Option.get
···354324 nxt )
355325 node.e
356326 in
357357- let seen', rev' =
358358- (* add each entry in this node to the seen set and record encounter order *)
359359- List.fold_left
360360- (fun (s, r) e ->
361361- if Cid.Set.mem e.v s then (s, r)
362362- else (Cid.Set.add e.v s, e.v :: r) )
363363- (seen, rev) node.e
364364- in
365365- (* prepending is O(1) per prepend + one O(n) to reverse, vs. O(n) per append = O(n^2) total *)
366366- ((cid, bytes) :: acc, nxt', seen', rev') )
367367- ([], [], leaves_seen, leaves_rev)
368368- next
327327+ (* prepending then reversing is O(2n), appending each time is O(n^2) *)
328328+ ((cid, bytes) :: acc, nxt') )
329329+ ([], []) next
369330 in
370370- step
371371- (Nodes
372372- { next= List.rev next'
373373- ; fetched= List.rev fetched
374374- ; leaves_seen= leaves_seen'
375375- ; leaves_rev= leaves_rev' } )
376376- (* if we're onto yielding leaves, do that *)
377377- | Leaves ((cid, bytes) :: rest) ->
378378- let next = if rest = [] then Done else Leaves rest in
379379- Lwt.return_some ((cid, bytes), next)
380380- (* once we're out of leaves, we're done *)
381381- | Leaves [] ->
382382- Lwt.return_some (Obj.magic (), Done)
331331+ step (List.rev next', List.rev fetched')
383332 in
384333 Lwt_seq.unfold_lwt step init_state
334334+335335+ (* depth-first pre-order as per sync 1.1, yields cid references in place of leaf nodes
336336+ for each node: node block, left subtree, then for each entry: record, right subtree *)
337337+ let to_ordered_stream t : ordered_item Lwt_seq.t =
338338+ (* queue items: `Node cid to visit, `Leaf cid to yield *)
339339+ let rec step queue =
340340+ match queue with
341341+ | [] ->
342342+ Lwt.return_none
343343+ | `Node cid :: rest -> (
344344+ let%lwt bytes_opt = Store.get_bytes t.blockstore cid in
345345+ match bytes_opt with
346346+ | None ->
347347+ step rest
348348+ | Some bytes ->
349349+ let node = decode_block_raw bytes in
350350+ (* queue items: left subtree, then for each entry: record then right subtree *)
351351+ let left_queue =
352352+ match node.l with Some l -> [`Node l] | None -> []
353353+ in
354354+ let entries_queue =
355355+ List.concat_map
356356+ (fun (e : entry_raw) ->
357357+ let right_queue =
358358+ match e.t with Some r -> [`Node r] | None -> []
359359+ in
360360+ `Leaf e.v :: right_queue )
361361+ node.e
362362+ in
363363+ let new_queue = left_queue @ entries_queue @ rest in
364364+ Lwt.return_some ((Node (cid, bytes) : ordered_item), new_queue) )
365365+ | `Leaf cid :: rest ->
366366+ Lwt.return_some ((Leaf cid : ordered_item), rest)
367367+ in
368368+ Lwt_seq.unfold_lwt step [`Node t.root]
385369386370 (* produces a cid and cbor-encoded bytes for a given tree *)
387371 let serialize t node : (Cid.t * bytes, exn) Lwt_result.t =
···620604 in
621605 bfs [t.root] Cid.Set.empty [] Cid.Set.empty
622606623623- (* list of all leaves belonging to a node, ordered by key *)
607607+ (* list of all leaves belonging to a node and its children, ordered by key *)
624608 let rec leaves_of_node n : (string * Cid.t) list Lwt.t =
625609 let%lwt left_leaves =
626610 n.left >>? function Some l -> leaves_of_node l | None -> Lwt.return []
···642626 in
643627 Lwt.return leaves
644628645645- (* little helper *)
629629+ (* list of all leaves in the mst *)
646630 let leaves_of_root t : (string * Cid.t) list Lwt.t =
647631 match%lwt retrieve_node t t.root with
648632 | None ->
+24
pegasus/lib/api/repo/importRepo.ml
···11+type query = {did: string} [@@deriving yojson {strict= false}]
22+33+let rec stream_to_seq stream () =
44+ let%lwt chunk = Dream.read stream in
55+ match chunk with
66+ | None ->
77+ Lwt.return Lwt_seq.Nil
88+ | Some data ->
99+ Lwt.return (Lwt_seq.Cons (Bytes.of_string data, stream_to_seq stream))
1010+1111+let handler =
1212+ Xrpc.handler ~auth:Authorization (fun ctx ->
1313+ let did = Auth.get_authed_did_exn ctx.auth in
1414+ let bytes_stream = Dream.body_stream ctx.req in
1515+ let car_stream = stream_to_seq bytes_stream in
1616+ let%lwt repo =
1717+ Repository.load did ~ds:ctx.db ~ensure_active:true ~write:true
1818+ in
1919+ let%lwt result = Repository.import_car repo car_stream in
2020+ match result with
2121+ | Ok _ ->
2222+ Dream.empty `OK
2323+ | Error e ->
2424+ Errors.internal_error ~msg:(Printexc.to_string e) () )