···11+MIT License
22+33+Copyright (c) 2025 Thomas Gazagnaire
44+55+Permission is hereby granted, free of charge, to any person obtaining a copy
66+of this software and associated documentation files (the "Software"), to deal
77+in the Software without restriction, including without limitation the rights
88+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
99+copies of the Software, and to permit persons to whom the Software is
1010+furnished to do so, subject to the following conditions:
1111+1212+The above copyright notice and this permission notice shall be included in all
1313+copies or substantial portions of the Software.
1414+1515+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1616+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1717+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1818+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1919+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2020+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2121+SOFTWARE.
+102
README.md
···11+# pds
22+33+ATProto Personal Data Server storage for OCaml.
44+55+## Overview
66+77+A library for reading and writing ATProto PDS (Personal Data Server) storage
88+format. This enables:
99+1010+- Local PDS-compatible repositories without running a full PDS
1111+- Offline repository manipulation (backup, migration, inspection)
1212+- CAR import/export for interoperability
1313+1414+## Storage Layout
1515+1616+```
1717+<repo>/
1818+├── pds.db # SQLite database
1919+│ ├── blocks table # CID → DAG-CBOR bytes
2020+│ ├── refs table # name → CID (branches)
2121+│ └── meta table # did, version, etc.
2222+└── blobs/ # Large binary data
2323+ ├── ba/ # First 2 chars of CID
2424+ │ └── bafyrei... # Full CID as filename
2525+ └── ...
2626+```
2727+2828+## Installation
2929+3030+```
3131+opam install pds
3232+```
3333+3434+## Usage
3535+3636+```ocaml
3737+(* Create a new repository *)
3838+let repo = Pds.create (Eio.Path.(fs / "my-repo")) ~did:(Atp.Did.of_string_exn "did:web:example.com") in
3939+4040+(* Store records *)
4141+Pds.put repo ~collection:"app.bsky.feed.post" ~rkey:"abc123" record_bytes;
4242+4343+(* Read records *)
4444+let data = Pds.get repo ~collection:"app.bsky.feed.post" ~rkey:"abc123" in
4545+4646+(* List collection *)
4747+let records = Pds.list repo ~collection:"app.bsky.feed.post" in
4848+4949+(* Store blobs *)
5050+let blob_ref = Pds.put_blob repo ~mime_type:"image/png" image_bytes in
5151+5252+(* Export as CAR *)
5353+let car_data = Pds.export_car repo in
5454+5555+(* Import from CAR *)
5656+let count = Pds.import_car repo car_data in
5757+5858+(* Close *)
5959+Pds.close repo
6060+```
6161+6262+## API
6363+6464+### Repository
6565+6666+- `Pds.create path ~did` - Create a new repository
6767+- `Pds.open_ path` - Open an existing repository
6868+- `Pds.did t` - Get the repository's DID
6969+- `Pds.close t` - Close the repository
7070+7171+### Records
7272+7373+- `Pds.get t ~collection ~rkey` - Read a record
7474+- `Pds.put t ~collection ~rkey data` - Write a record
7575+- `Pds.delete t ~collection ~rkey` - Delete a record
7676+- `Pds.list t ~collection` - List records in a collection
7777+7878+### Blobs
7979+8080+- `Pds.put_blob t ~mime_type data` - Store a blob
8181+- `Pds.get_blob t cid` - Read a blob
8282+8383+### Commits
8484+8585+- `Pds.head t` - Get current commit CID
8686+- `Pds.checkout t` - Get MST at HEAD
8787+- `Pds.commit t ~tree ~message ~signing_key` - Create a signed commit
8888+8989+### Import/Export
9090+9191+- `Pds.import_car t data` - Import blocks from CAR
9292+- `Pds.export_car t` - Export repository as CAR
9393+9494+## Related Work
9595+9696+- [ocaml-atp](https://tangled.org/anil.recoil.org/ocaml-atp) - ATProto primitives (MST, CID, DAG-CBOR, CAR)
9797+- [ocaml-sqlite](https://tangled.org/gazagnaire.org/ocaml-sqlite) - SQLite key-value store (used internally)
9898+- [Bluesky PDS](https://github.com/bluesky-social/pds) - Reference TypeScript implementation
9999+100100+## License
101101+102102+MIT License. See [LICENSE.md](LICENSE.md) for details.
+23
dune-project
···11+(lang dune 3.0)
22+33+(name pds)
44+55+(generate_opam_files true)
66+77+(license MIT)
88+(authors "Thomas Gazagnaire")
99+(maintainers "Thomas Gazagnaire")
1010+(source (uri https://tangled.org/gazagnaire.org/ocaml-pds))
1111+1212+(package
1313+ (name pds)
1414+ (synopsis "ATProto Personal Data Server storage for OCaml")
1515+ (description
1616+ "A library for reading and writing ATProto PDS (Personal Data Server) storage format. Enables local PDS-compatible repositories, offline repository manipulation, and CAR import/export.")
1717+ (depends
1818+ (ocaml (>= 5.1))
1919+ (eio (>= 1.0))
2020+ (atp (>= 0.1))
2121+ (sqlite (>= 0.1))
2222+ (alcotest :with-test)
2323+ (eio_main :with-test)))
+59
lib/blob_store.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Thomas Gazagnaire. All rights reserved.
33+ SPDX-License-Identifier: MIT
44+ ---------------------------------------------------------------------------*)
55+66+(** Filesystem-based blob storage with CID-based paths.
77+88+ Blobs are stored in a directory structure organized by CID prefix:
99+ {[
1010+ blobs/
1111+ ├── ba/
1212+ │ └── bafyrei...
1313+ └── bf/
1414+ └── bafybei...
1515+ ]} *)
1616+1717+type t = { dir : Eio.Fs.dir_ty Eio.Path.t }
1818+1919+let create dir =
2020+ (try Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 dir with _ -> ());
2121+ { dir }
2222+2323+let blob_path t cid =
2424+ let s = Atp.Cid.to_string cid in
2525+ let prefix = String.sub s 0 (min 2 (String.length s)) in
2626+ Eio.Path.(t.dir / prefix / s)
2727+2828+let put t ~mime_type data =
2929+ (* Create CID from raw data *)
3030+ let cid = Atp.Cid.create `Raw data in
3131+ let path = blob_path t cid in
3232+ (* Create parent directory *)
3333+ let parent = Eio.Path.split path |> Option.map fst in
3434+ Option.iter
3535+ (fun p ->
3636+ try Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 p with _ -> ())
3737+ parent;
3838+ (* Write blob data *)
3939+ Eio.Path.save ~create:(`Or_truncate 0o644) path data;
4040+ (* Return blob reference *)
4141+ let size = Int64.of_int (String.length data) in
4242+ { Atp.Blob_ref.cid; mime_type; size }
4343+4444+let get t cid =
4545+ let path = blob_path t cid in
4646+ try Some (Eio.Path.load path)
4747+ with Eio.Io (Eio.Fs.E (Eio.Fs.Not_found _), _) -> None
4848+4949+let delete t cid =
5050+ let path = blob_path t cid in
5151+ try Eio.Path.unlink path
5252+ with Eio.Io (Eio.Fs.E (Eio.Fs.Not_found _), _) -> ()
5353+5454+let mem t cid =
5555+ let path = blob_path t cid in
5656+ match Eio.Path.kind ~follow:true path with
5757+ | `Regular_file -> true
5858+ | _ -> false
5959+ | exception Eio.Io (Eio.Fs.E (Eio.Fs.Not_found _), _) -> false
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Thomas Gazagnaire. All rights reserved.
33+ SPDX-License-Identifier: MIT
44+ ---------------------------------------------------------------------------*)
55+66+type t = {
77+ path : Eio.Fs.dir_ty Eio.Path.t;
88+ db : Sqlite.t;
99+ blocks : Sqlite.Table.t;
1010+ refs : Sqlite.Table.t;
1111+ meta : Sqlite.Table.t;
1212+ blobs : Blob_store.t;
1313+ blockstore : Atp.Blockstore.writable;
1414+ mutable did : Atp.Did.t;
1515+}
1616+1717+(* Repository layout:
1818+ <path>/
1919+ ├── pds.db # SQLite database (blocks, refs, meta tables)
2020+ └── blobs/ # Filesystem blob storage
2121+*)
2222+2323+let db_path path = Eio.Path.(path / "pds.db")
2424+let blobs_path path = Eio.Path.(path / "blobs")
2525+2626+let create path ~did =
2727+ (* Create directory structure *)
2828+ (try Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 path with _ -> ());
2929+ let db = Sqlite.create (db_path path) in
3030+ let blocks = Sqlite.Table.create db ~name:"blocks" in
3131+ let refs = Sqlite.Table.create db ~name:"refs" in
3232+ let meta = Sqlite.Table.create db ~name:"meta" in
3333+ let blobs = Blob_store.create (blobs_path path) in
3434+ let blockstore = Sqlite_blockstore.create blocks in
3535+ (* Store DID in metadata *)
3636+ Sqlite.Table.put meta "did" (Atp.Did.to_string did);
3737+ Sqlite.Table.put meta "version" "1";
3838+ { path; db; blocks; refs; meta; blobs; blockstore; did }
3939+4040+let open_ path =
4141+ let db_file = db_path path in
4242+ (* Check database exists *)
4343+ (match Eio.Path.kind ~follow:true db_file with
4444+ | `Regular_file -> ()
4545+ | _ -> failwith "PDS database not found"
4646+ | exception Eio.Io (Eio.Fs.E (Eio.Fs.Not_found _), _) ->
4747+ failwith "PDS database not found");
4848+ let db = Sqlite.create db_file in
4949+ let blocks = Sqlite.Table.create db ~name:"blocks" in
5050+ let refs = Sqlite.Table.create db ~name:"refs" in
5151+ let meta = Sqlite.Table.create db ~name:"meta" in
5252+ let blobs = Blob_store.create (blobs_path path) in
5353+ let blockstore = Sqlite_blockstore.create blocks in
5454+ (* Read DID from metadata *)
5555+ let did =
5656+ match Sqlite.Table.get meta "did" with
5757+ | Some s -> Atp.Did.of_string_exn s
5858+ | None -> failwith "PDS metadata missing DID"
5959+ in
6060+ { path; db; blocks; refs; meta; blobs; blockstore; did }
6161+6262+let did t = t.did
6363+let close t = Sqlite.close t.db
6464+let blockstore t = t.blockstore
6565+6666+(* Refs *)
6767+6868+let get_ref t name =
6969+ Option.map Atp.Cid.of_string (Sqlite.Table.get t.refs name)
7070+7171+let set_ref t name cid =
7272+ Sqlite.Table.put t.refs name (Atp.Cid.to_string cid)
7373+7474+let delete_ref t name = Sqlite.Table.delete t.refs name
7575+7676+let list_refs t =
7777+ let refs = ref [] in
7878+ Sqlite.Table.iter t.refs ~f:(fun name cid_str ->
7979+ refs := (name, Atp.Cid.of_string cid_str) :: !refs);
8080+ List.rev !refs
8181+8282+(* HEAD is a special ref *)
8383+8484+let head t = get_ref t "head"
8585+let set_head t cid = set_ref t "head" cid
8686+8787+(* MST operations *)
8888+8989+let checkout t =
9090+ match head t with
9191+ | None -> None
9292+ | Some cid -> Some (Atp.Mst.of_cid cid ~store:t.blockstore)
9393+9494+(* Record key format: collection/rkey *)
9595+9696+let record_key ~collection ~rkey = collection ^ "/" ^ rkey
9797+9898+let parse_record_key key =
9999+ match String.index_opt key '/' with
100100+ | None -> None
101101+ | Some i ->
102102+ let collection = String.sub key 0 i in
103103+ let rkey = String.sub key (i + 1) (String.length key - i - 1) in
104104+ Some (collection, rkey)
105105+106106+(* Records *)
107107+108108+let get t ~collection ~rkey =
109109+ match checkout t with
110110+ | None -> None
111111+ | Some mst ->
112112+ let key = record_key ~collection ~rkey in
113113+ (match Atp.Mst.get key mst ~store:t.blockstore with
114114+ | None -> None
115115+ | Some cid -> t.blockstore#get cid)
116116+117117+let put t ~collection ~rkey data =
118118+ let key = record_key ~collection ~rkey in
119119+ (* Create block for record data *)
120120+ let cid = Atp.Cid.create `Dag_cbor data in
121121+ t.blockstore#put cid data;
122122+ (* Update MST *)
123123+ let mst =
124124+ match checkout t with
125125+ | None -> Atp.Mst.empty
126126+ | Some mst -> mst
127127+ in
128128+ let new_mst = Atp.Mst.add key cid mst ~store:t.blockstore in
129129+ let root_cid = Atp.Mst.to_cid new_mst ~store:t.blockstore in
130130+ set_head t root_cid
131131+132132+let delete t ~collection ~rkey =
133133+ match checkout t with
134134+ | None -> ()
135135+ | Some mst ->
136136+ let key = record_key ~collection ~rkey in
137137+ let new_mst = Atp.Mst.remove key mst ~store:t.blockstore in
138138+ let root_cid = Atp.Mst.to_cid new_mst ~store:t.blockstore in
139139+ set_head t root_cid
140140+141141+let list t ~collection =
142142+ match checkout t with
143143+ | None -> []
144144+ | Some mst ->
145145+ let prefix = collection ^ "/" in
146146+ let prefix_len = String.length prefix in
147147+ Atp.Mst.leaves mst ~store:t.blockstore
148148+ |> Seq.filter_map (fun (key, cid) ->
149149+ if String.length key >= prefix_len
150150+ && String.sub key 0 prefix_len = prefix
151151+ then
152152+ let rkey = String.sub key prefix_len (String.length key - prefix_len) in
153153+ Some (rkey, cid)
154154+ else None)
155155+ |> List.of_seq
156156+157157+(* Blobs *)
158158+159159+let put_blob t ~mime_type data = Blob_store.put t.blobs ~mime_type data
160160+let get_blob t cid = Blob_store.get t.blobs cid
161161+162162+(* CAR Import/Export *)
163163+164164+let import_car t car_data =
165165+ let reader = Bytesrw.Bytes.Reader.of_string car_data in
166166+ let _header, blocks = Atp.Car.read ~cid_format:`Atproto reader in
167167+ let count = ref 0 in
168168+ Seq.iter
169169+ (fun (cid, data) ->
170170+ t.blockstore#put cid data;
171171+ incr count)
172172+ blocks;
173173+ !count
174174+175175+let export_car t =
176176+ match head t with
177177+ | None ->
178178+ (* Empty repo - return empty CAR *)
179179+ Atp.Car.to_string ~cid_format:`Atproto
180180+ { Atp.Car.version = 1; roots = [] }
181181+ Seq.empty
182182+ | Some root_cid ->
183183+ match checkout t with
184184+ | None ->
185185+ Atp.Car.to_string ~cid_format:`Atproto
186186+ { Atp.Car.version = 1; roots = [ root_cid ] }
187187+ Seq.empty
188188+ | Some mst ->
189189+ let blocks = Atp.Mst.to_blocks mst ~store:t.blockstore in
190190+ Atp.Car.to_string ~cid_format:`Atproto
191191+ { Atp.Car.version = 1; roots = [ root_cid ] }
192192+ blocks
+103
lib/pds.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Thomas Gazagnaire. All rights reserved.
33+ SPDX-License-Identifier: MIT
44+ ---------------------------------------------------------------------------*)
55+66+(** ATProto Personal Data Server storage.
77+88+ A library for reading and writing ATProto PDS (Personal Data Server)
99+ storage format. Enables local PDS-compatible repositories, offline
1010+ repository manipulation, and CAR import/export. *)
1111+1212+(** {1 Repository} *)
1313+1414+type t
1515+(** A PDS repository. *)
1616+1717+val create : Eio.Fs.dir_ty Eio.Path.t -> did:Atp.Did.t -> t
1818+(** [create path ~did] creates a new repository at [path] for the given DID.
1919+ The directory will be created if it doesn't exist. *)
2020+2121+val open_ : Eio.Fs.dir_ty Eio.Path.t -> t
2222+(** [open_ path] opens an existing repository at [path].
2323+ @raise Failure if the repository doesn't exist or is invalid. *)
2424+2525+val did : t -> Atp.Did.t
2626+(** [did t] returns the repository's DID. *)
2727+2828+val close : t -> unit
2929+(** [close t] closes the repository and releases resources. *)
3030+3131+(** {1 Blockstore} *)
3232+3333+val blockstore : t -> Atp.Blockstore.writable
3434+(** [blockstore t] returns the underlying blockstore (SQLite-backed).
3535+ Use this for direct block operations. *)
3636+3737+(** {1 Commits} *)
3838+3939+val head : t -> Atp.Cid.t option
4040+(** [head t] returns the current commit CID, or [None] if the repository
4141+ is empty (no commits yet). *)
4242+4343+val set_head : t -> Atp.Cid.t -> unit
4444+(** [set_head t cid] sets the HEAD reference to [cid]. *)
4545+4646+val checkout : t -> Atp.Mst.node option
4747+(** [checkout t] returns the MST root at HEAD, or [None] if empty. *)
4848+4949+(** {1 Records}
5050+5151+ Records are stored in the MST with keys of the form
5252+ [collection/rkey], e.g., ["app.bsky.feed.post/abc123"]. *)
5353+5454+val get : t -> collection:string -> rkey:string -> string option
5555+(** [get t ~collection ~rkey] reads a record's DAG-CBOR bytes. *)
5656+5757+val put : t -> collection:string -> rkey:string -> string -> unit
5858+(** [put t ~collection ~rkey data] writes a record (DAG-CBOR bytes).
5959+ Creates a new block and updates the MST. *)
6060+6161+val delete : t -> collection:string -> rkey:string -> unit
6262+(** [delete t ~collection ~rkey] removes a record from the MST. *)
6363+6464+val list : t -> collection:string -> (string * Atp.Cid.t) list
6565+(** [list t ~collection] returns all record keys and CIDs in a collection. *)
6666+6767+(** {1 Blobs}
6868+6969+ Blobs are stored in a separate filesystem directory, organized by
7070+ CID prefix for efficient access. *)
7171+7272+val put_blob : t -> mime_type:string -> string -> Atp.Blob_ref.t
7373+(** [put_blob t ~mime_type data] stores a blob and returns its reference.
7474+ The blob is content-addressed by its CID. *)
7575+7676+val get_blob : t -> Atp.Cid.t -> string option
7777+(** [get_blob t cid] reads blob data by CID. *)
7878+7979+(** {1 Import/Export} *)
8080+8181+val import_car : t -> string -> int
8282+(** [import_car t car_data] imports blocks from CAR data.
8383+ Returns the number of blocks imported. Does not update HEAD. *)
8484+8585+val export_car : t -> string
8686+(** [export_car t] exports the repository as CAR data.
8787+ Includes all blocks reachable from HEAD. *)
8888+8989+(** {1 Refs}
9090+9191+ Named references (like git branches). The default ref is ["head"]. *)
9292+9393+val get_ref : t -> string -> Atp.Cid.t option
9494+(** [get_ref t name] returns the CID for ref [name]. *)
9595+9696+val set_ref : t -> string -> Atp.Cid.t -> unit
9797+(** [set_ref t name cid] sets ref [name] to [cid]. *)
9898+9999+val delete_ref : t -> string -> unit
100100+(** [delete_ref t name] removes ref [name]. *)
101101+102102+val list_refs : t -> (string * Atp.Cid.t) list
103103+(** [list_refs t] returns all refs. *)
+49
lib/sqlite_blockstore.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Thomas Gazagnaire. All rights reserved.
33+ SPDX-License-Identifier: MIT
44+ ---------------------------------------------------------------------------*)
55+66+(** SQLite-backed blockstore implementing [Atp.Blockstore.writable]. *)
77+88+class sqlite_store (blocks : Sqlite.Table.t) =
99+ object (self)
1010+ method get cid =
1111+ let key = Atp.Cid.to_string cid in
1212+ Sqlite.Table.get blocks key
1313+1414+ method get_exn cid =
1515+ match self#get cid with
1616+ | Some data -> data
1717+ | None -> Atp.Blockstore.raise_error (`Block_not_found cid)
1818+1919+ method has cid =
2020+ let key = Atp.Cid.to_string cid in
2121+ Sqlite.Table.mem blocks key
2222+2323+ method get_many cids =
2424+ let blocks_map, missing =
2525+ List.fold_left
2626+ (fun (found, missing) cid ->
2727+ match self#get cid with
2828+ | Some data -> (Atp.Block_map.set cid data found, missing)
2929+ | None -> (found, cid :: missing))
3030+ (Atp.Block_map.empty, []) cids
3131+ in
3232+ { Atp.Block_map.blocks = blocks_map; missing = List.rev missing }
3333+3434+ method put cid data =
3535+ let key = Atp.Cid.to_string cid in
3636+ Sqlite.Table.put blocks key data
3737+3838+ method put_many new_blocks =
3939+ Atp.Block_map.iter (fun cid data -> self#put cid data) new_blocks
4040+4141+ method delete cid =
4242+ let key = Atp.Cid.to_string cid in
4343+ Sqlite.Table.delete blocks key
4444+4545+ method delete_many cids = List.iter self#delete cids
4646+ method sync = ()
4747+ end
4848+4949+let create blocks = (new sqlite_store blocks :> Atp.Blockstore.writable)