···2233A sync engine for atproto with an optional baked-in database.
4455+## WIP notes
66+77+- Until the Store interface has stabilized, let's only work on stdout and duckdb to keep things simple.
88+99+## TODOs
1010+1111+v1
1212+1313+- Create first working implementations of all Remote interfaces so that we build familiarity with their semantics and idiosyncracies
1414+- Implement repo discovery interfaces on Remote
1515+- Implement bidi identity resolution and caching
1616+- Implement a work-scheduler which abstracts Remote, Identity, and Store to backfill & sync using Selectors
1717+- Implement Store querying interfaces; develop indexing strategies
1818+- Create v1 CLI and APIs
1919+2020+future
2121+2222+- A local data read/write model, perhaps modeled as virtual local users
2323+- Prometheus endpoints
2424+525## Example selectors
626727My data:
+1-1
cmd/butterfly/main.go
···7171 defer stream.Close()
72727373 // Process the stream
7474- if err := s.Receive(ctx, stream); err != nil {
7474+ if err := s.BackfillRepo(ctx, *did, stream); err != nil {
7575 logger.Fatalf("failed to process stream: %v", err)
7676 }
7777}
+1-1
cmd/butterfly/remote/carfile.go
···136136137137// SubscribeRecords is not supported for CAR files
138138func (c *CarfileRemote) SubscribeRecords(ctx context.Context, params SubscribeRecordsParams) (*RemoteStream, error) {
139139- return nil, fmt.Errorf("subscribe records: %w", ErrNotImplemented)
139139+ return nil, fmt.Errorf("subscribe records: %w", ErrNotSupported)
140140}
141141142142// readCar reads and validates a CAR file
···2323}
24242525type repoStats struct {
2626- numRecords int
2727- numCommits int
2828- numErrors int
2929- collections map[string]int
2626+ numRecords int
2727+ numCommits int
2828+ numErrors int
2929+ collections map[string]int
3030}
31313232// Setup initializes the store
···4545 return nil
4646}
47474848-// Receive processes events from the stream
4949-func (s *StdoutStore) Receive(ctx context.Context, stream *remote.RemoteStream) error {
4848+// BackfillRepo resets a repo and re-ingests it from a remote stream
4949+func (s *StdoutStore) BackfillRepo(ctx context.Context, did string, stream *remote.RemoteStream) error {
5050+ return s.ActiveSync(ctx, stream)
5151+}
5252+5353+// ActiveSync processes live update events from a remote stream
5454+func (s *StdoutStore) ActiveSync(ctx context.Context, stream *remote.RemoteStream) error {
5055 for event := range stream.Ch {
5156 select {
5257 case <-ctx.Done():
···9499 if stats.numErrors > 0 {
95100 fmt.Printf(" Errors: %d\n", stats.numErrors)
96101 }
9797-102102+98103 if len(stats.collections) > 0 {
99104 fmt.Println(" Collections:")
100105 for col, count := range stats.collections {
+6-2
cmd/butterfly/store/store.go
···1515 // Close tears down the store and releases resources
1616 Close() error
17171818- // Receive processes events from a remote stream
1818+ // BackfillRepo resets a repo and re-ingests it from a remote stream
1919 // The implementation should handle context cancellation appropriately
2020- Receive(ctx context.Context, stream *remote.RemoteStream) error
2020+ BackfillRepo(ctx context.Context, did string, stream *remote.RemoteStream) error
2121+2222+ // ActiveSync processes live update events from a remote stream
2323+ // The implementation should handle context cancellation appropriately
2424+ ActiveSync(ctx context.Context, stream *remote.RemoteStream) error
2125}
22262327// StoreType identifies the type of store
+15-7
cmd/butterfly/store/tarfiles.go
···1818 "github.com/bluesky-social/indigo/cmd/butterfly/remote"
1919)
20202121+// NOTE: do not work on this this until the Store interface is fully mature
2222+2123// TarfilesStore implements Store by writing repository data to gzipped tar files
2224type TarfilesStore struct {
2325 // The directory to store the .tar.gz files
···34363537// tarWriter manages writing to a single tar file
3638type tarWriter struct {
3737- file *os.File
3939+ file *os.File
3840 gzipWriter *gzip.Writer
3939- writer *tar.Writer
4040- entries map[string]bool // Track existing entries
4141- tempFile string
4242- finalFile string
4141+ writer *tar.Writer
4242+ entries map[string]bool // Track existing entries
4343+ tempFile string
4444+ finalFile string
4345}
44464547// NewTarfilesStore creates a new TarfilesStore
···8991 return nil
9092}
91939292-// Receive processes events from the stream
9393-func (t *TarfilesStore) Receive(ctx context.Context, stream *remote.RemoteStream) error {
9494+// BackfillRepo resets a repo and re-ingests it from a remote stream
9595+func (t *TarfilesStore) BackfillRepo(ctx context.Context, did string, stream *remote.RemoteStream) error {
9696+ // TODO For now, it's fine to just reuse ActiveSync. A more optimized variant could be useful.
9797+ return t.ActiveSync(ctx, stream)
9898+}
9999+100100+// ActiveSync processes live update events from a remote stream
101101+func (t *TarfilesStore) ActiveSync(ctx context.Context, stream *remote.RemoteStream) error {
94102 for event := range stream.Ch {
95103 select {
96104 case <-ctx.Done():
+6-6
cmd/butterfly/store/tarfiles_test.go
···114114 }()
115115116116 // Process the stream
117117- err = store.Receive(ctx, stream)
117117+ err = store.ActiveSync(ctx, stream)
118118 require.NoError(t, err)
119119120120 // Close the store to finalize tar files
···187187 }()
188188189189 // Process the stream
190190- err = store.Receive(ctx, stream)
190190+ err = store.ActiveSync(ctx, stream)
191191 require.NoError(t, err)
192192193193 // Close the store
···241241 }()
242242243243 // Process should stop when context is cancelled
244244- err = store.Receive(ctx, stream)
244244+ err = store.ActiveSync(ctx, stream)
245245 assert.ErrorIs(t, err, context.Canceled)
246246}
247247···273273 }
274274 close(stream.Ch)
275275276276- err = store.Receive(ctx, stream)
276276+ err = store.ActiveSync(ctx, stream)
277277 require.NoError(t, err)
278278 err = store.Close()
279279 require.NoError(t, err)
···303303 }
304304 close(stream.Ch)
305305306306- err = store.Receive(ctx, stream)
306306+ err = store.ActiveSync(ctx, stream)
307307 require.NoError(t, err)
308308 err = store.Close()
309309 require.NoError(t, err)
···381381 }()
382382383383 // Should process without error, skipping invalid events
384384- err = store.Receive(ctx, stream)
384384+ err = store.ActiveSync(ctx, stream)
385385 require.NoError(t, err)
386386387387 err = store.Close()