···2233A sync engine for atproto with an optional baked-in database.
4455+```
66+# Sync from a CAR file
77+butterfly sync --input carfile --car file.car --did did:plc:example --store duckdb
88+99+# Sync from a PDS
1010+butterfly sync --input pds --pds https://pds.example.com --did did:plc:example
1111+1212+# Subscribe to a relay
1313+butterfly sync --input relay --relay wss://relay.example.com --store stdout
1414+1515+# Discover repositories on a PDS
1616+butterfly discover --input pds --pds https://pds.example.com --limit 50 --format json
1717+1818+# Discover with identity resolution
1919+butterfly discover --input relay --relay wss://relay.example.com --store stdout
2020+```
2121+522## WIP notes
623724- Until the Store interface has stabilized, let's only work on stdout and duckdb to keep things simple.
8252626+```
2727+go run ./cmd/butterfly sync -input pds -pds https://morel.us-east.host.bsky.network -did did:plc:ragtjsm2j2vknwkz3zp4oxrd -store duckdb -db ~/tmp/test-tarfiles/duckdb.sql
2828+go run ./cmd/butterfly sync -input carfile -car /Users/paulfrazee/tmp/carfiles/pfrazee.car -did did:plc:ragtjsm2j2vknwkz3zp4oxrd -store duckdb -db ~/tmp/test-tarfiles/duckdb.sql
2929+go run ./cmd/butterfly discover -input pds -pds https://morel.us-east.host.bsky.network -limit 10 -store stdout
3030+```
3131+932## TODOs
10331134v1
12351313-- Create first working implementations of all Remote interfaces so that we build familiarity with their semantics and idiosyncracies
1414-- Implement repo discovery interfaces on Remote
1515-- Implement bidi identity resolution and caching
1616-- Implement a work-scheduler which abstracts Remote, Identity, and Store to backfill & sync using Selectors
1717-- Implement Store querying interfaces; develop indexing strategies
1818-- Create v1 CLI and APIs
3636+- Backfill
3737+ - Implement all Remote FetchRepo
3838+ - Implement all Remote ListRepos
3939+ - Implement bidi identity resolution and caching
4040+- Core
4141+ - Implement a work-scheduler which abstracts Remote, Identity, and Store to backfill using Selectors
4242+ - Track "known" Lexicons and maintain an active definitions cache
4343+ - Automatically generate secondary indexes which are generally useful
4444+ - Investigate indexes generated by Lexicon definitions
4545+ - Implement Store querying interfaces
4646+- Active sync
4747+ - Implement all Remote SyncRecords
4848+ - Update sync-state-tracking (repo state)
4949+- Interface
5050+ - Create v1 CLI and APIs
19512052future
2153
+28
cmd/butterfly/store/thing_test.go
···11+package store
22+33+import (
44+ "context"
55+ "fmt"
66+ "testing"
77+ "time"
88+)
99+1010+func TestPebbleThing(t *testing.T) {
1111+ start := time.Now()
1212+ ctx := context.Background()
1313+ s := NewPebbleStore("/Users/paulfrazee/tmp/test-tarfiles/pebble.db/")
1414+ s.Setup(ctx)
1515+ records, _ := s.ListAllRecords(ctx, "did:plc:ragtjsm2j2vknwkz3zp4oxrd", 0)
1616+ collections := 0
1717+ recordCount := 0
1818+ for _, sub := range records {
1919+ collections++
2020+ for range sub {
2121+ recordCount++
2222+ }
2323+ }
2424+ fmt.Printf("%d collections, %d records\n", collections, recordCount)
2525+ s.Close()
2626+ elapsed := time.Since(start)
2727+ fmt.Printf("Completed in %s", elapsed)
2828+}