···6666 client: &PdsClient,
6767 did: &str,
6868 rkey: &str,
6969- project_name: &str,
7069 config: &SyncConfig,
7170) -> Result<Vec<SyncCycleResult>, String> {
7271 let dir = Path::new(&config.dir);
···8079 }
8180 }
82818383- let result = sync_cycle(client, did, rkey, project_name, dir, cycle, config).await?;
8282+ let result = sync_cycle(client, did, rkey, dir, cycle, config).await?;
84838584 if config.verbose {
8685 eprintln!(
···114113 client: &PdsClient,
115114 did: &str,
116115 rkey: &str,
117117- project_name: &str,
118116 dir: &Path,
119117 cycle: u32,
120118 config: &SyncConfig,
···159157 } else {
160158 // Save local changes with filters
161159 let save_result =
162162- save::save_filtered(dir, client, did, rkey, project_name, None, inc, exc, config.verbose).await?;
160160+ save::save_filtered(dir, client, did, rkey, inc, exc, config.verbose).await?;
163161 files_uploaded = save_result.files_uploaded;
164162165163 // Determine if this cycle should materialize
+86-102
src/types.rs
···33use serde::{Deserialize, Serialize};
44use std::collections::{HashMap, HashSet};
5566-/// Collection name for yrs repo records.
77-pub const COLLECTION: &str = "net.commoninternet.yrsrepo";
66+/// Collection name for project-level repo records.
77+pub const REPO_COLLECTION: &str = "net.commoninternet.yrsrepo";
88+99+/// Collection name for per-device branch records.
1010+pub const BRANCH_COLLECTION: &str = "net.commoninternet.yrsbranch";
811912/// Key for the manifest FileEntry in the YrsRepo.
1013pub const MANIFEST_KEY: &str = "pdsyrs_manifest";
···1922 Binary,
2023}
21242222-/// A collaborator reference — points to another device's rkey, optionally on a different PDS.
2323-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
2424-pub struct Collaborator {
2525- /// The rkey of the collaborator's repo record.
2626- pub rkey: String,
2727- /// PDS URL if different from the current PDS (for cross-PDS collaboration).
2828- #[serde(skip_serializing_if = "Option::is_none")]
2929- pub pds: Option<String>,
3030-}
3131-3225/// The file index — maps relative paths to FileEntry metadata.
3326/// Stored as a PDS blob (not inline in the record) to avoid record size limits.
3427pub type FileIndex = HashMap<String, FileEntry>;
35283636-/// A repo stored on PDS with Yrs CRDT state per file.
2929+/// A project-level record stored at `yrsrepo/<project-name>`.
3730///
3838-/// `name` is the project name — shared across all devices/writers for the same project.
3939-/// Each device gets its own rkey (auto-generated), while `name` identifies the project.
4040-/// `collaborators` lists other device rkeys for the same project, enabling merge
4141-/// without needing to list all records.
3131+/// One per project. The rkey IS the project name (deterministic).
3232+/// Lists all branches (devices) that belong to this project.
3333+#[derive(Debug, Clone, Serialize, Deserialize)]
3434+pub struct YrsRepo {
3535+ pub name: String,
3636+ #[serde(default, skip_serializing_if = "Vec::is_empty")]
3737+ pub branches: Vec<BranchRef>,
3838+ #[serde(rename = "updatedAt")]
3939+ pub updated_at: String,
4040+}
4141+4242+/// A reference to a branch within a YrsRepo.
4343+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
4444+pub struct BranchRef {
4545+ /// The rkey of the YrsBranch record.
4646+ pub rkey: String,
4747+ /// Human-readable label (e.g. device name, or the handle).
4848+ #[serde(default, skip_serializing_if = "Option::is_none")]
4949+ pub label: Option<String>,
5050+}
5151+5252+/// A per-device branch record stored at `yrsbranch/<project>-<random8>`.
5353+///
5454+/// Pure data — no back-reference to project name. The YrsRepo record
5555+/// is the only link between branches and their project.
4256///
4357/// File metadata is stored in a separate index blob (pointed to by `index`).
4458/// `blobs` lists all blob refs that must be kept alive to prevent PDS garbage collection.
4559#[derive(Debug, Clone, Serialize, Deserialize)]
4646-pub struct YrsRepo {
4747- pub name: String,
6060+pub struct YrsBranch {
4861 /// Pointer to the index within the PDS blob containing the pack (contains serialized FileIndex).
4962 pub index: PackItemRef,
5050- /// All blob CIDs referenced by this repo — prevents PDS garbage collection.
6363+ /// All blob CIDs referenced by this branch — prevents PDS garbage collection.
5164 pub blobs: Vec<BlobRef>,
5265 #[serde(rename = "updatedAt")]
5366 pub updated_at: String,
5454- #[serde(default, skip_serializing_if = "Vec::is_empty")]
5555- pub collaborators: Vec<Collaborator>,
5667}
57685869/// A single file's state, stored as Yrs CRDT.
···6374 /// For text: hash of UTF-8 content. For binary: hash of raw bytes.
6475 #[serde(rename = "contentHash")]
6576 pub content_hash: String,
6666- /// Blob reference for GC — CID of the PDS blob containing the BaseYrsUpdate.
6767- #[serde(rename = "baseBlob")]
6868- pub base_blob: BlobRef,
6977 /// State vector bytes, base64-encoded for inline storage.
7078 #[serde(rename = "stateVector")]
7179 pub state_vector: String,
···8391 /// PackItemRef to BaseYrsUpdate data within a pack.
8492 #[serde(rename = "base")]
8593 pub base: Option<PackItemRef>,
8686- /// For binary conflict files, the original path before conflict split.
8787- #[serde(rename = "conflictSource", skip_serializing_if = "Option::is_none")]
8888- pub conflict_source: Option<String>,
8994}
909591969292-/// Reference to a pack item within a PDS blob.
9797+/// Reference to a pack item within one or more PDS blobs.
9898+///
9999+/// A pack may be stored as a single PDS blob or split across multiple blobs
100100+/// (when it exceeds the ~50MB ATProto limit). `blobs` always contains the
101101+/// ordered list of PDS blobs — reassemble them to get the full pack data.
93102#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
94103pub struct PackItemRef {
9595- /// The PDS blob containing the pack.
9696- pub blob: BlobRef,
104104+ /// The PDS blob(s) containing the pack. Length 1 for normal packs,
105105+ /// length N for packs split across multiple blobs.
106106+ pub blobs: Vec<BlobRef>,
97107 /// Byte offset within the pack data section.
98108 pub offset: u64,
99109 /// Length of data within the pack data section.
···101111 /// Whether the pack is gzip-compressed.
102112 #[serde(default, skip_serializing_if = "std::ops::Not::not")]
103113 pub compressed: bool,
104104- /// For chunked packs (>40MB), ordered list of chunk blob refs.
105105- /// When present, `blob` is unused — reassemble from chunks instead.
106106- #[serde(skip_serializing_if = "Option::is_none")]
107107- pub chunks: Option<Vec<BlobRef>>,
108114}
109115110116/// A chunked blob — multiple BlobRefs that form a single logical blob.
···162168 };
163169164170 for entry in entries.values() {
165165- add(&entry.base_blob, &mut seen, &mut refs);
166171 if let Some(ref pr) = entry.base {
167167- add(&pr.blob, &mut seen, &mut refs);
168168- if let Some(ref chunks) = pr.chunks {
169169- for chunk in chunks {
170170- add(chunk, &mut seen, &mut refs);
171171- }
172172+ for blob in &pr.blobs {
173173+ add(blob, &mut seen, &mut refs);
172174 }
173175 }
174176 for update in &entry.updates {
175175- add(&update.blob, &mut seen, &mut refs);
176176- if let Some(ref chunks) = update.chunks {
177177- for chunk in chunks {
178178- add(chunk, &mut seen, &mut refs);
179179- }
177177+ for blob in &update.blobs {
178178+ add(blob, &mut seen, &mut refs);
180179 }
181180 }
182181 }
···220219 fn file_entry_serialization() {
221220 let entry = FileEntry {
222221 content_hash: "abc123def456".to_string(),
223223- base_blob: BlobRef::new(
224224- "bafysnap".to_string(),
225225- "application/octet-stream".to_string(),
226226- 100,
227227- ),
228222 state_vector: "AQID".to_string(),
229223 updates: vec![],
230224 updates_count: 0,
231225 base_at: "2026-03-13T00:00:00Z".to_string(),
232226 kind: FileKind::Text,
233227 base: None,
234234- conflict_source: None,
235235- };
228228+ };
236229 let json = serde_json::to_string(&entry).unwrap();
237237- assert!(json.contains("\"baseBlob\""));
238230 assert!(json.contains("\"stateVector\""));
239239- assert!(!json.contains("updatesBlob")); // skipped when None
240231 assert!(json.contains("\"kind\":\"text\"")); // always serialized
241232 let deserialized: FileEntry = serde_json::from_str(&json).unwrap();
242233 assert_eq!(deserialized.content_hash, "abc123def456");
···247238 fn binary_file_entry_serialization() {
248239 let entry = FileEntry {
249240 content_hash: String::new(),
250250- base_blob: BlobRef::new(
251251- "bafybin".to_string(),
252252- "application/octet-stream".to_string(),
253253- 5000,
254254- ),
255241 state_vector: String::new(),
256242 updates: vec![],
257243 updates_count: 0,
258244 base_at: "2026-03-13T00:00:00Z".to_string(),
259245 kind: FileKind::Binary,
260246 base: None,
261261- conflict_source: None,
262262- };
247247+ };
263248 let json = serde_json::to_string(&entry).unwrap();
264249 assert!(json.contains("\"binary\"")); // kind is serialized for binary
265250 let deserialized: FileEntry = serde_json::from_str(&json).unwrap();
···268253269254 #[test]
270255 fn yrs_repo_serialization() {
256256+ let record = YrsRepo {
257257+ name: "my-site".to_string(),
258258+ branches: vec![BranchRef {
259259+ rkey: "my-site-a1b2c3d4".to_string(),
260260+ label: None,
261261+ }],
262262+ updated_at: "2026-03-13T00:00:00Z".to_string(),
263263+ };
264264+ let json = serde_json::to_string(&record).unwrap();
265265+ assert!(json.contains("\"branches\"")); // branches field present
266266+ assert!(json.contains("my-site-a1b2c3d4"));
267267+ let deserialized: YrsRepo = serde_json::from_str(&json).unwrap();
268268+ assert_eq!(deserialized.name, "my-site");
269269+ assert_eq!(deserialized.branches.len(), 1);
270270+ }
271271+272272+ #[test]
273273+ fn yrs_branch_serialization() {
271274 let pack_blob = BlobRef::new(
272275 "bafypack".to_string(),
273276 "application/octet-stream".to_string(),
274277 5000,
275278 );
276279 let index_ref = PackItemRef {
277277- blob: pack_blob.clone(),
280280+ blobs: vec![pack_blob.clone()],
278281 offset: 0,
279282 length: 200,
280283 compressed: false,
281281- chunks: None,
282284 };
283283- let record = YrsRepo {
284284- name: "my-site".to_string(),
285285+ let record = YrsBranch {
285286 index: index_ref,
286287 blobs: vec![pack_blob],
287288 updated_at: "2026-03-13T00:00:00Z".to_string(),
288288- collaborators: vec![],
289289 };
290290 let json = serde_json::to_string(&record).unwrap();
291291- assert!(!json.contains("collaborators")); // empty vec is skipped
292291 assert!(json.contains("\"index\"")); // index field present
293292 assert!(json.contains("\"blobs\"")); // blobs field present
294294- let deserialized: YrsRepo = serde_json::from_str(&json).unwrap();
295295- assert_eq!(deserialized.name, "my-site");
293293+ let deserialized: YrsBranch = serde_json::from_str(&json).unwrap();
296294 assert_eq!(deserialized.blobs.len(), 1);
297297- assert!(deserialized.collaborators.is_empty());
298295 }
299296300297 #[test]
301298 fn collect_blob_refs_deduplicates() {
302299 let blob = BlobRef::new("bafyshared".to_string(), "application/octet-stream".to_string(), 100);
303300 let item_ref = PackItemRef {
304304- blob: blob.clone(), offset: 0, length: 50, compressed: false, chunks: None,
301301+ blobs: vec![blob.clone()], offset: 0, length: 50, compressed: false,
305302 };
306303 let mut index = FileIndex::new();
307304 index.insert("a.txt".to_string(), FileEntry {
308305 content_hash: String::new(),
309309- base_blob: blob.clone(),
310306 state_vector: String::new(),
311307 updates: vec![], updates_count: 0,
312308 base_at: String::new(), kind: FileKind::Text,
313313- base: Some(item_ref.clone()), conflict_source: None,
314314- });
309309+ base: Some(item_ref.clone()), });
315310 index.insert("b.txt".to_string(), FileEntry {
316311 content_hash: String::new(),
317317- base_blob: blob.clone(),
318312 state_vector: String::new(),
319313 updates: vec![], updates_count: 0,
320314 base_at: String::new(), kind: FileKind::Text,
321321- base: Some(item_ref), conflict_source: None,
322322- });
315315+ base: Some(item_ref), });
323316 let refs = collect_blob_refs(&index);
324317 assert_eq!(refs.len(), 1, "same blob CID should be deduplicated");
325318 assert_eq!(refs[0].cid(), "bafyshared");
···332325 let mut index = FileIndex::new();
333326 index.insert("a.txt".to_string(), FileEntry {
334327 content_hash: String::new(),
335335- base_blob: pack_blob.clone(),
336328 state_vector: String::new(),
337329 updates: vec![PackItemRef {
338338- blob: update_blob.clone(), offset: 0, length: 50, compressed: false, chunks: None,
330330+ blobs: vec![update_blob.clone()], offset: 0, length: 50, compressed: false,
339331 }],
340332 updates_count: 1,
341333 base_at: String::new(), kind: FileKind::Text,
342334 base: Some(PackItemRef {
343343- blob: pack_blob.clone(), offset: 0, length: 100, compressed: false, chunks: None,
335335+ blobs: vec![pack_blob.clone()], offset: 0, length: 100, compressed: false,
344336 }),
345345- conflict_source: None,
346346- });
337337+ });
347338 let refs = collect_blob_refs(&index);
348339 assert_eq!(refs.len(), 2);
349340 let cids: Vec<&str> = refs.iter().map(|r| r.cid()).collect();
···354345 #[test]
355346 fn pack_item_ref_serialization() {
356347 let item_ref = PackItemRef {
357357- blob: BlobRef::new(
348348+ blobs: vec![BlobRef::new(
358349 "bafypack".to_string(),
359350 "application/octet-stream".to_string(),
360351 5000,
361361- ),
352352+ )],
362353 offset: 100,
363354 length: 200,
364355 compressed: true,
365365- chunks: None,
366356 };
367357 let json = serde_json::to_string(&item_ref).unwrap();
368358 assert!(json.contains("\"compressed\":true"));
···370360 assert_eq!(deserialized.offset, 100);
371361 assert_eq!(deserialized.length, 200);
372362 assert!(deserialized.compressed);
373373- assert!(deserialized.chunks.is_none());
363363+ assert_eq!(deserialized.blobs.len(), 1);
374364 }
375365376366 #[test]
377367 fn pack_item_ref_compressed_false_omitted() {
378368 // compressed=false should be skipped in serialization
379369 let item_ref = PackItemRef {
380380- blob: BlobRef::new(
370370+ blobs: vec![BlobRef::new(
381371 "bafypack".to_string(),
382372 "application/octet-stream".to_string(),
383373 1000,
384384- ),
374374+ )],
385375 offset: 0,
386376 length: 100,
387377 compressed: false,
388388- chunks: None,
389378 };
390379 let json = serde_json::to_string(&item_ref).unwrap();
391380 assert!(
···395384 }
396385397386 #[test]
398398- fn pack_item_ref_with_chunks() {
387387+ fn pack_item_ref_with_multiple_blobs() {
399388 let item_ref = PackItemRef {
400400- blob: BlobRef::new(
401401- "bafychunk0".to_string(),
402402- "application/octet-stream".to_string(),
403403- 40000000,
404404- ),
405405- offset: 0,
406406- length: 500,
407407- compressed: true,
408408- chunks: Some(vec![
389389+ blobs: vec![
409390 BlobRef::new(
410391 "bafychunk0".to_string(),
411392 "application/octet-stream".to_string(),
···416397 "application/octet-stream".to_string(),
417398 10000000,
418399 ),
419419- ]),
400400+ ],
401401+ offset: 0,
402402+ length: 500,
403403+ compressed: true,
420404 };
421405 let json = serde_json::to_string(&item_ref).unwrap();
422406 assert!(json.contains("bafychunk1"));
423407 let deserialized: PackItemRef = serde_json::from_str(&json).unwrap();
424424- assert_eq!(deserialized.chunks.as_ref().unwrap().len(), 2);
408408+ assert_eq!(deserialized.blobs.len(), 2);
425409 }
426410427411#[test]
+25-44
src/yrs_pds.rs
···77use yrs::{Doc, GetString, ReadTxn, Text, Transact};
8899use crate::pds_client::PdsClient;
1010-use crate::types::{FileEntry, FileIndex, FileKind, YrsRepo};
1010+use crate::types::{FileEntry, FileIndex, FileKind, YrsBranch};
11111212/// Create a Yrs Doc from text content.
1313pub fn doc_from_text(content: &str) -> Doc {
···7878 Ok(())
7979}
80808181-/// Upload a Doc as a FileEntry to PDS.
8282-pub async fn doc_to_file_entry(
8383- doc: &Doc,
8484- client: &PdsClient,
8585- did: &str,
8686-) -> Result<FileEntry, String> {
8787- let base_update = encode_base_update(doc);
8888- let sv = encode_state_vector(doc);
8989-9090- // Upload base update as a PDS blob
9191- let base_blob = client.upload_blob(base_update.clone()).await?;
9292-9393- // We need to reference the blob in a record for it to persist,
9494- // so we return the FileEntry which will be embedded in a YrsRepo.
9595-9696- let now = chrono::Utc::now().to_rfc3339();
9797- let _ = did; // used by caller for the record
9898-9999- Ok(FileEntry {
100100- content_hash: String::new(),
101101- base_blob,
102102- state_vector: base64_encode(&sv),
103103- updates: vec![],
104104- updates_count: 0,
105105- base_at: now,
106106- kind: FileKind::Text,
107107- base: None,
108108- conflict_source: None,
109109- })
110110-}
111111-11281/// Reconstruct a Doc from a FileEntry by downloading blobs from PDS.
11382///
11483/// Downloads the BaseYrsUpdate via base, then applies incremental updates.
···13099}
131100132101/// Extract data from a PackItemRef by downloading and parsing the pack from PDS.
102102+///
103103+/// Downloads all blobs listed in the PackItemRef, reassembles them into
104104+/// the full pack, then extracts the item at the given offset/length.
133105pub async fn fetch_pack_item(
134106 item_ref: &crate::types::PackItemRef,
135107 client: &PdsClient,
136108 did: &str,
137109) -> Result<Vec<u8>, String> {
138138- let pack_data = if let Some(ref chunks) = item_ref.chunks {
139139- let mut chunk_data = Vec::new();
140140- for chunk_ref in chunks {
141141- chunk_data.push(client.get_blob(did, chunk_ref.cid()).await?);
142142- }
143143- crate::pack::reassemble_chunks(&chunk_data)
144144- } else {
145145- client.get_blob(did, item_ref.blob.cid()).await?
146146- };
110110+ let pack_data = download_pack_blobs(&item_ref.blobs, client, did).await?;
147111 let (_, data_section) = crate::pack::parse_pack_auto(&pack_data)?;
148112 let start = item_ref.offset as usize;
149113 let end = start + item_ref.length as usize;
···156120 Ok(data_section[start..end].to_vec())
157121}
158122123123+/// Download and reassemble pack blobs into a single byte vector.
124124+async fn download_pack_blobs(
125125+ blobs: &[crate::types::BlobRef],
126126+ client: &PdsClient,
127127+ did: &str,
128128+) -> Result<Vec<u8>, String> {
129129+ if blobs.len() == 1 {
130130+ client.get_blob(did, blobs[0].cid()).await
131131+ } else {
132132+ let mut chunks = Vec::new();
133133+ for blob in blobs {
134134+ chunks.push(client.get_blob(did, blob.cid()).await?);
135135+ }
136136+ Ok(crate::pack::reassemble_chunks(&chunks))
137137+ }
138138+}
139139+159140/// Get the raw data for a FileEntry's base update, via its base PackItemRef.
160141pub async fn fetch_file_data(
161142 entry: &FileEntry,
···167148 fetch_pack_item(item_ref, client, did).await
168149}
169150170170-/// Load the FileIndex from a YrsRepo's index blob.
151151+/// Load the FileIndex from a YrsBranch's index blob.
171152pub async fn load_file_index(
172172- repo: &YrsRepo,
153153+ branch: &YrsBranch,
173154 client: &PdsClient,
174155 did: &str,
175156) -> Result<FileIndex, String> {
176176- let data = fetch_pack_item(&repo.index, client, did).await?;
157157+ let data = fetch_pack_item(&branch.index, client, did).await?;
177158 serde_json::from_slice(&data).map_err(|e| format!("parse FileIndex: {}", e))
178159}
179160