this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Cleanup

Paul Frazee afdf0386 ec89a9c5

+457 -158
+8 -4
cmd/butterfly/identity.go
··· 1 - /* 2 - Identity resolution infra 3 - */ 1 + // Package main provides identity resolution infrastructure for Butterfly 2 + package main 4 3 5 - package main 4 + // TODO: Implement identity resolution functionality 5 + // This will likely include: 6 + // - DID resolution 7 + // - Handle resolution 8 + // - Identity caching 9 + // - Integration with the atproto identity package
+54 -10
cmd/butterfly/main.go
··· 1 1 package main 2 2 3 3 import ( 4 + "context" 5 + "flag" 4 6 "fmt" 7 + "log" 8 + "os" 5 9 6 10 "github.com/bluesky-social/indigo/cmd/butterfly/remote" 7 11 "github.com/bluesky-social/indigo/cmd/butterfly/store" 8 12 ) 9 13 10 14 func main() { 11 - r := remote.CarfileRemote{Filepath: "/Users/paulfrazee/tmp/carfiles/pfrazee.car"} 12 - s := store.StdoutStore{Mode: store.StdoutStoreModeStats} 15 + // Command line flags 16 + var ( 17 + carFile = flag.String("car", "", "Path to CAR file to read") 18 + did = flag.String("did", "", "DID to fetch (required)") 19 + outputMode = flag.String("output", "stats", "Output mode: stats or passthrough") 20 + help = flag.Bool("help", false, "Show help") 21 + ) 22 + flag.Parse() 13 23 14 - if err := s.Setup(); err != nil { 15 - fmt.Println(err) 16 - return 24 + if *help || *carFile == "" || *did == "" { 25 + fmt.Fprintf(os.Stderr, "Usage: butterfly -car <path> -did <did> [-output stats|passthrough]\n") 26 + flag.PrintDefaults() 27 + os.Exit(1) 17 28 } 18 - defer s.Close() 19 29 20 - res, err := r.FetchRepo(remote.FetchRepoParams{Did: "did:plc:ragtjsm2j2vknwkz3zp4oxrd"}) 30 + // Set up logger 31 + logger := log.New(os.Stderr, "butterfly: ", log.LstdFlags) 32 + 33 + // Create remote 34 + r := &remote.CarfileRemote{Filepath: *carFile} 35 + 36 + // Create store based on output mode 37 + var s store.Store 38 + switch *outputMode { 39 + case "passthrough": 40 + s = &store.StdoutStore{Mode: store.StdoutStoreModePassthrough} 41 + case "stats": 42 + s = &store.StdoutStore{Mode: store.StdoutStoreModeStats} 43 + default: 44 + logger.Fatalf("unknown output mode: %s", *outputMode) 45 + } 46 + 47 + // Create context 48 + ctx := context.Background() 49 + 50 + // Initialize store 51 + if err := s.Setup(ctx); err != nil { 52 + logger.Fatalf("failed to setup store: %v", err) 53 + } 54 + defer func() { 55 + if err := s.Close(); err != nil { 56 + logger.Printf("failed to close store: %v", err) 57 + } 58 + }() 59 + 60 + // Fetch repository 61 + stream, err := r.FetchRepo(ctx, remote.FetchRepoParams{Did: *did}) 21 62 if err != nil { 22 - fmt.Println(err) 23 - return 63 + logger.Fatalf("failed to fetch repo: %v", err) 24 64 } 65 + defer stream.Close() 25 66 26 - s.Receive(res) 67 + // Process the stream 68 + if err := s.Receive(ctx, stream); err != nil { 69 + logger.Fatalf("failed to process stream: %v", err) 70 + } 27 71 }
+99 -55
cmd/butterfly/remote/carfile.go
··· 1 - /* 2 - .car file remote interface 3 - */ 1 + // Package remote provides a CAR file implementation of the Remote interface 4 2 package remote 5 3 6 4 import ( 7 5 "context" 8 6 "fmt" 9 7 "os" 8 + "time" 10 9 11 10 "github.com/bluesky-social/indigo/atproto/data" 12 11 "github.com/bluesky-social/indigo/atproto/repo" ··· 14 13 "github.com/ipfs/go-cid" 15 14 ) 16 15 16 + // CarfileRemote implements the Remote interface for reading from CAR files 17 17 type CarfileRemote struct { 18 18 Filepath string 19 19 } 20 20 21 - func (self CarfileRemote) ListRepos(params ListReposParams) (*ListReposResult, error) { 22 - ctx := context.Background() 23 - 24 - _, _, did, err := ReadCar(ctx, self.Filepath) 21 + // ListRepos returns the DID of the repository in the CAR file 22 + func (c *CarfileRemote) ListRepos(ctx context.Context, params ListReposParams) (*ListReposResult, error) { 23 + _, _, did, err := c.readCar(ctx) 25 24 if err != nil { 26 - return nil, err 25 + return nil, fmt.Errorf("failed to read CAR file: %w", err) 27 26 } 28 27 29 - res := ListReposResult{ 28 + return &ListReposResult{ 30 29 Dids: []string{did}, 31 - } 32 - return &res, nil 30 + }, nil 33 31 } 34 32 35 - func (self CarfileRemote) FetchRepo(params FetchRepoParams) (*RemoteStream, error) { 36 - ctx := context.Background() 37 - 38 - // read & validate 39 - _, r, did, err := ReadCar(ctx, self.Filepath) 33 + // FetchRepo streams the contents of a repository from the CAR file 34 + func (c *CarfileRemote) FetchRepo(ctx context.Context, params FetchRepoParams) (*RemoteStream, error) { 35 + commit, r, did, err := c.readCar(ctx) 40 36 if err != nil { 41 - return nil, err 37 + return nil, fmt.Errorf("failed to read CAR file: %w", err) 42 38 } 39 + 43 40 if did != params.Did { 44 - return nil, fmt.Errorf("Repo not found: %s", params.Did) 41 + return nil, fmt.Errorf("%w: %s", ErrRepoNotFound, params.Did) 42 + } 43 + 44 + // Create stream with cancellable context 45 + streamCtx, cancel := context.WithCancel(ctx) 46 + stream := &RemoteStream{ 47 + Ch: make(chan StreamEvent, 100), // Buffer for better performance 48 + cancel: cancel, 45 49 } 46 50 47 - res := RemoteStream{Ch: make(chan StreamEvent)} 51 + // Stream repository contents 52 + go func() { 53 + defer close(stream.Ch) 54 + defer cancel() 55 + 56 + // Send records from the repository 57 + err := r.MST.Walk(func(k []byte, v cid.Cid) error { 58 + // Check for cancellation 59 + select { 60 + case <-streamCtx.Done(): 61 + return streamCtx.Err() 62 + default: 63 + } 48 64 49 - // walk & emit 50 - go (func() { 51 - err = r.MST.Walk(func(k []byte, v cid.Cid) error { 52 65 col, rkey, err := syntax.ParseRepoPath(string(k)) 53 66 if err != nil { 54 - return err 67 + return fmt.Errorf("invalid repo path %q: %w", string(k), err) 55 68 } 56 - recBytes, _, err := r.GetRecordBytes(ctx, col, rkey) 69 + 70 + // Skip if collections filter is specified 71 + if len(params.Collections) > 0 { 72 + found := false 73 + for _, c := range params.Collections { 74 + if c == col.String() { 75 + found = true 76 + break 77 + } 78 + } 79 + if !found { 80 + return nil 81 + } 82 + } 83 + 84 + recBytes, _, err := r.GetRecordBytes(streamCtx, col, rkey) 57 85 if err != nil { 58 - return err 86 + return fmt.Errorf("failed to get record %s/%s: %w", col, rkey, err) 59 87 } 60 88 61 89 rec, err := data.UnmarshalCBOR(recBytes) 62 90 if err != nil { 63 - return err 91 + return fmt.Errorf("failed to unmarshal record %s/%s: %w", col, rkey, err) 64 92 } 65 93 66 - res.Ch <- StreamEvent{ 67 - Did: did, 68 - Time: 0, // TODO 69 - Kind: "commit", 70 - Commit: StreamEventCommit{ 71 - Rev: "", // TODO 72 - Operation: "create", 94 + event := StreamEvent{ 95 + Did: did, 96 + Timestamp: time.Now(), // TODO - CAR files don't have timestamps? 97 + Kind: EventKindCommit, 98 + Commit: &StreamEventCommit{ 99 + Rev: commit.Rev, // TODO - is this accurate? 100 + Operation: OpCreate, 73 101 Collection: col.String(), 74 102 Rkey: rkey.String(), 75 103 Record: rec, 76 - Cid: "", // TODO 104 + Cid: v.String(), 77 105 }, 78 106 } 107 + 108 + select { 109 + case stream.Ch <- event: 110 + case <-streamCtx.Done(): 111 + return streamCtx.Err() 112 + } 113 + 79 114 return nil 80 115 }) 81 116 82 117 if err != nil { 83 - res.Ch <- StreamEvent{ 84 - Did: did, 85 - Time: 0, // TODO 86 - Kind: "Error", 87 - Error: StreamEventError{Err: err}, 118 + // Send error event 119 + select { 120 + case stream.Ch <- StreamEvent{ 121 + Did: did, 122 + Timestamp: time.Now(), 123 + Kind: EventKindError, 124 + Error: &StreamEventError{ 125 + Err: err, 126 + Fatal: true, 127 + }, 128 + }: 129 + case <-streamCtx.Done(): 88 130 } 89 131 } 90 - 91 - close(res.Ch) 92 - })() 132 + }() 93 133 94 - return &res, nil 134 + return stream, nil 95 135 } 96 136 97 - func (self CarfileRemote) SubscribeRecords(params SubscribeRecordsParams) (*RemoteStream, error) { 98 - res := RemoteStream{Ch: make(chan StreamEvent)} 99 - close(res.Ch) 100 - return &res, nil 137 + // SubscribeRecords is not supported for CAR files 138 + func (c *CarfileRemote) SubscribeRecords(ctx context.Context, params SubscribeRecordsParams) (*RemoteStream, error) { 139 + return nil, fmt.Errorf("subscribe records: %w", ErrNotImplemented) 101 140 } 102 141 103 - func ReadCar(ctx context.Context, path string) (*repo.Commit, *repo.Repo, string, error) { 104 - file, err := os.Open(path) 142 + // readCar reads and validates a CAR file 143 + func (c *CarfileRemote) readCar(ctx context.Context) (*repo.Commit, *repo.Repo, string, error) { 144 + file, err := os.Open(c.Filepath) 105 145 if err != nil { 106 - return nil, nil, "", err 146 + return nil, nil, "", fmt.Errorf("failed to open file: %w", err) 107 147 } 108 - c, r, err := repo.LoadRepoFromCAR(ctx, file) 148 + defer file.Close() 149 + 150 + commit, r, err := repo.LoadRepoFromCAR(ctx, file) 109 151 if err != nil { 110 - return nil, nil, "", err 152 + return nil, nil, "", fmt.Errorf("failed to load repo from CAR: %w", err) 111 153 } 112 - did, err := syntax.ParseDID(c.DID) 154 + 155 + did, err := syntax.ParseDID(commit.DID) 113 156 if err != nil { 114 - return nil, nil, "", err 157 + return nil, nil, "", fmt.Errorf("invalid DID in commit: %w", err) 115 158 } 116 - return c, r, did.String(), nil 159 + 160 + return commit, r, did.String(), nil 117 161 }
+91 -21
cmd/butterfly/remote/remote.go
··· 1 + // Package remote defines interfaces for fetching AT Protocol data from various sources 1 2 package remote 2 3 4 + import ( 5 + "context" 6 + "errors" 7 + "time" 8 + ) 9 + 10 + // Remote defines the interface for data sources in the butterfly sync engine 3 11 type Remote interface { 4 - // Lists repositories hosted at the given remote. Not all parameters will be supported. 5 - ListRepos(params ListReposParams) (ListReposResult, error) 12 + // ListRepos lists repositories hosted at the given remote 13 + // Not all remotes will support all parameters 14 + ListRepos(ctx context.Context, params ListReposParams) (*ListReposResult, error) 6 15 7 - // Fetches the contents of the requested repositories. Not all parameters will be supported. 8 - FetchRepo(params FetchRepoParams) (RemoteStream, error) 16 + // FetchRepo fetches the contents of the requested repository 17 + // Not all remotes will support all parameters 18 + FetchRepo(ctx context.Context, params FetchRepoParams) (*RemoteStream, error) 9 19 10 - // Subscribes to the record event-stream of the remote. Not all parameters will be supported. 11 - SubscribeRecords(params SubscribeRecordsParams) (RemoteStream, error) 20 + // SubscribeRecords subscribes to the record event-stream of the remote 21 + // Not all remotes will support all parameters 22 + SubscribeRecords(ctx context.Context, params SubscribeRecordsParams) (*RemoteStream, error) 12 23 } 13 24 25 + // ListReposParams contains parameters for listing repositories 14 26 type ListReposParams struct { 15 27 Collection string 28 + Cursor string 29 + Limit int 16 30 } 31 + 32 + // ListReposResult contains the result of a repository listing 17 33 type ListReposResult struct { 18 - Dids []string 34 + Dids []string 35 + Cursor string // For pagination 19 36 } 20 37 38 + // FetchRepoParams contains parameters for fetching a repository 21 39 type FetchRepoParams struct { 22 40 Did string 23 41 Collections []string 42 + Since *string // Optional: fetch only changes since this revision 24 43 } 25 44 45 + // SubscribeRecordsParams contains parameters for subscribing to records 26 46 type SubscribeRecordsParams struct { 27 47 Dids []string 28 48 Collections []string 49 + Cursor int64 // Resume from this cursor position 29 50 } 30 51 52 + // RemoteStream represents a stream of events from a remote 31 53 type RemoteStream struct { 32 - Ch chan StreamEvent 54 + Ch chan StreamEvent 55 + cancel context.CancelFunc 33 56 } 34 57 58 + // Close closes the stream 59 + func (s *RemoteStream) Close() error { 60 + if s.cancel != nil { 61 + s.cancel() 62 + } 63 + return nil 64 + } 65 + 66 + // StreamEventKind represents the type of stream event 67 + type StreamEventKind string 68 + 69 + const ( 70 + EventKindCommit StreamEventKind = "commit" 71 + EventKindIdentity StreamEventKind = "identity" 72 + EventKindAccount StreamEventKind = "account" 73 + EventKindError StreamEventKind = "error" 74 + ) 75 + 76 + // StreamEvent represents an event from the remote stream 35 77 type StreamEvent struct { 36 - Did string 37 - Time uint 38 - Kind string 39 - Commit StreamEventCommit 40 - Identity StreamEventIdentity 41 - Account StreamEventAccount 42 - Error StreamEventError 78 + Did string 79 + Timestamp time.Time 80 + Kind StreamEventKind 81 + 82 + // Event-specific data (only one will be populated based on Kind) 83 + Commit *StreamEventCommit 84 + Identity *StreamEventIdentity 85 + Account *StreamEventAccount 86 + Error *StreamEventError 43 87 } 44 88 89 + // CommitOperation represents the type of commit operation 90 + type CommitOperation string 91 + 92 + const ( 93 + OpCreate CommitOperation = "create" 94 + OpUpdate CommitOperation = "update" 95 + OpDelete CommitOperation = "delete" 96 + ) 97 + 98 + // StreamEventCommit represents a repository commit event 45 99 type StreamEventCommit struct { 46 100 Rev string 47 - Operation string 101 + Operation CommitOperation 48 102 Collection string 49 103 Rkey string 50 104 Record map[string]any 51 105 Cid string 52 106 } 107 + 108 + // StreamEventIdentity represents an identity update event 53 109 type StreamEventIdentity struct { 54 110 Did string 55 111 Handle string 56 - Seq uint 57 - Time string 112 + Seq uint64 113 + Time time.Time 58 114 } 115 + 116 + // StreamEventAccount represents an account status change event 59 117 type StreamEventAccount struct { 60 118 Active bool 61 119 Did string 62 - Seq uint 63 - Time string 120 + Seq uint64 121 + Time time.Time 64 122 } 123 + 124 + // StreamEventError represents an error event in the stream 65 125 type StreamEventError struct { 66 - Err error 126 + Err error 127 + Fatal bool // Whether this error terminates the stream 128 + RetryAfter *time.Duration // Suggested retry delay 67 129 } 130 + 131 + // Common errors 132 + var ( 133 + ErrRemoteUnavailable = errors.New("remote service unavailable") 134 + ErrNotImplemented = errors.New("operation not implemented by this remote") 135 + ErrInvalidDID = errors.New("invalid DID format") 136 + ErrRepoNotFound = errors.New("repository not found") 137 + )
+107 -38
cmd/butterfly/selectors.go
··· 1 - /* 2 - Query patterns for selecting content to sync and retain 3 - */ 4 - 1 + // Package main provides query patterns for selecting content to sync and retain 5 2 package main 6 3 7 - import "fmt" 4 + import ( 5 + "encoding/json" 6 + "fmt" 7 + ) 8 8 9 + // SelectorDoc represents a complete selector configuration with selection rules and retention policies 9 10 type SelectorDoc struct { 10 11 Selectors []Selector `json:"select"` 11 12 Retainers Retainer `json:"retain"` 12 13 } 13 14 15 + // Selector defines a rule for selecting content based on a where clause and assigns it a tag 14 16 type Selector struct { 15 17 Where WhereClause `json:"where"` 16 18 Tag string `json:"tag"` 17 19 } 18 20 21 + // WhereClause specifies the criteria for selecting content 19 22 type WhereClause struct { 20 - Repo string `json:"repo"` 21 - Collection string `json:"collection"` 22 - Attr string `json:"attr"` 23 - Service string `json:"service"` 24 - Method string `json:"method"` 25 - Params map[string]string `json:"params"` 26 - Pagination map[string]string `json:"pagination"` 23 + // Repo selection fields 24 + Repo string `json:"repo,omitempty"` 25 + Collection string `json:"collection,omitempty"` 26 + Attr string `json:"attr,omitempty"` 27 + 28 + // Service selection fields 29 + Service string `json:"service,omitempty"` 30 + Method string `json:"method,omitempty"` 31 + Params map[string]string `json:"params,omitempty"` 32 + Pagination map[string]string `json:"pagination,omitempty"` 27 33 } 28 34 35 + // Retainer maps tags to their retention policies 36 + // Format: tag -> collection pattern -> retention policy 29 37 type Retainer map[string]map[string]string 30 38 31 - // SelectorDoc 39 + // String returns a string representation of the SelectorDoc 40 + func (s SelectorDoc) String() string { 41 + return fmt.Sprintf("selectors=%v retain=%v", s.Selectors, s.Retainers) 42 + } 43 + 44 + // Validate checks if the SelectorDoc is valid 45 + func (s SelectorDoc) Validate() error { 46 + if len(s.Selectors) == 0 { 47 + return fmt.Errorf("no selectors defined") 48 + } 32 49 33 - func (self SelectorDoc) String() string { 34 - return fmt.Sprintf("%s retain=%s", self.Selectors, self.Retainers) 50 + tags := make(map[string]bool) 51 + for i, sel := range s.Selectors { 52 + if err := sel.Validate(); err != nil { 53 + return fmt.Errorf("selector[%d]: %w", i, err) 54 + } 55 + if tags[sel.Tag] { 56 + return fmt.Errorf("duplicate tag: %s", sel.Tag) 57 + } 58 + tags[sel.Tag] = true 59 + } 60 + 61 + // Validate that all retainer tags exist in selectors 62 + for tag := range s.Retainers { 63 + if !tags[tag] { 64 + return fmt.Errorf("retainer references unknown tag: %s", tag) 65 + } 66 + } 67 + 68 + return nil 35 69 } 36 70 37 - // Selector 71 + // IsRepo returns true if this selector targets a repository 72 + func (s Selector) IsRepo() bool { 73 + return s.Where.Repo != "" && s.Where.Collection == "" && s.Where.Attr == "" 74 + } 38 75 39 - func (self Selector) IsRepo() bool { 40 - return self.Where.Repo != "" 76 + // IsRepoRecord returns true if this selector targets specific records in a repository 77 + func (s Selector) IsRepoRecord() bool { 78 + return s.Where.Repo != "" && s.Where.Collection != "" && s.Where.Attr != "" 41 79 } 42 80 43 - func (self Selector) IsRepoRecord() bool { 44 - return self.Where.Repo != "" && self.Where.Collection != "" && self.Where.Attr != "" 81 + // IsService returns true if this selector targets a service endpoint 82 + func (s Selector) IsService() bool { 83 + return s.Where.Service != "" && s.Where.Method != "" && s.Where.Attr != "" 45 84 } 46 85 47 - func (self Selector) IsService() bool { 48 - return self.Where.Service != "" && self.Where.Method != "" && self.Where.Attr != "" 86 + // Type returns the type of selector as a string 87 + func (s Selector) Type() string { 88 + switch { 89 + case s.IsRepo(): 90 + return "repo" 91 + case s.IsRepoRecord(): 92 + return "repo_record" 93 + case s.IsService(): 94 + return "service" 95 + default: 96 + return "invalid" 97 + } 49 98 } 50 99 51 - func (self Selector) IsValid() bool { 52 - return self.IsRepo() || self.IsRepoRecord() || self.IsService() 53 - } 100 + // Validate checks if the selector is valid 101 + func (s Selector) Validate() error { 102 + if s.Tag == "" { 103 + return fmt.Errorf("missing tag") 104 + } 54 105 55 - func (self Selector) String() string { 56 - if self.Tag == "" { 57 - return "(Invalid selector)" 106 + if !s.IsRepo() && !s.IsRepoRecord() && !s.IsService() { 107 + return fmt.Errorf("invalid where clause: must specify either repo, repo+collection+attr, or service+method+attr") 58 108 } 59 - return fmt.Sprintf("%s,tag=%s", self.Where, self.Tag) 109 + 110 + return nil 60 111 } 61 112 62 - // WhereClause 113 + // String returns a string representation of the Selector 114 + func (s Selector) String() string { 115 + return fmt.Sprintf("tag=%s,%s", s.Tag, s.Where) 116 + } 63 117 64 - func (self WhereClause) String() string { 65 - if self.Repo != "" && self.Collection != "" && self.Attr != "" { 66 - return fmt.Sprintf("where=at://%s/%s/*#%s", self.Repo, self.Collection, self.Attr) 118 + // String returns a string representation of the WhereClause 119 + func (w WhereClause) String() string { 120 + switch { 121 + case w.Repo != "" && w.Collection != "" && w.Attr != "": 122 + return fmt.Sprintf("where=at://%s/%s/*#%s", w.Repo, w.Collection, w.Attr) 123 + case w.Repo != "": 124 + return fmt.Sprintf("where=at://%s", w.Repo) 125 + case w.Service != "" && w.Method != "" && w.Attr != "": 126 + return fmt.Sprintf("where=https://%s/_xrpc/%s/*#%s", w.Service, w.Method, w.Attr) 127 + default: 128 + return "where=(invalid)" 67 129 } 68 - if self.Repo != "" { 69 - return fmt.Sprintf("where=at://%s", self.Repo) 130 + } 131 + 132 + // ParseSelectorDoc parses a JSON selector document 133 + func ParseSelectorDoc(data []byte) (*SelectorDoc, error) { 134 + var doc SelectorDoc 135 + if err := json.Unmarshal(data, &doc); err != nil { 136 + return nil, fmt.Errorf("failed to parse selector doc: %w", err) 70 137 } 71 - if self.Service != "" && self.Method != "" && self.Attr != "" { 72 - return fmt.Sprintf("where=https://%s/_xrpc/%s/*#%s", self.Service, self.Method, self.Attr) 138 + 139 + if err := doc.Validate(); err != nil { 140 + return nil, fmt.Errorf("invalid selector doc: %w", err) 73 141 } 74 - return "where=(Invalid clause)" 142 + 143 + return &doc, nil 75 144 }
+71 -24
cmd/butterfly/store/stdout.go
··· 1 - /* 2 - Dump-to-stdout storage interface 3 - */ 4 - 1 + // Package store provides a stdout implementation of the Store interface 5 2 package store 6 3 7 4 import ( 5 + "context" 8 6 "fmt" 9 7 10 8 "github.com/bluesky-social/indigo/cmd/butterfly/remote" 11 9 ) 12 10 11 + // Output modes for StdoutStore 13 12 const ( 14 13 StdoutStoreModePassthrough = iota 15 14 StdoutStoreModeStats 16 15 ) 17 16 17 + // StdoutStore implements Store by writing to stdout 18 18 type StdoutStore struct { 19 19 Mode int 20 20 21 - // stats 22 - // TODO: should support multiple repos 23 - Did string 24 - NumRecords uint 21 + // Stats tracking 22 + stats map[string]*repoStats 25 23 } 26 24 27 - func (self *StdoutStore) Setup() error { 25 + type repoStats struct { 26 + numRecords int 27 + numCommits int 28 + numErrors int 29 + collections map[string]int 30 + } 31 + 32 + // Setup initializes the store 33 + func (s *StdoutStore) Setup(ctx context.Context) error { 34 + if s.Mode == StdoutStoreModeStats { 35 + s.stats = make(map[string]*repoStats) 36 + } 28 37 return nil 29 38 } 30 39 31 - func (self *StdoutStore) Close() error { 40 + // Close outputs final statistics if in stats mode 41 + func (s *StdoutStore) Close() error { 42 + if s.Mode == StdoutStoreModeStats && len(s.stats) > 0 { 43 + s.printStats() 44 + } 32 45 return nil 33 46 } 34 47 35 - func (self *StdoutStore) Receive(s *remote.RemoteStream) error { 36 - for event := range s.Ch { 37 - if self.Did == "" { 38 - self.Did = event.Did 48 + // Receive processes events from the stream 49 + func (s *StdoutStore) Receive(ctx context.Context, stream *remote.RemoteStream) error { 50 + for event := range stream.Ch { 51 + select { 52 + case <-ctx.Done(): 53 + return ctx.Err() 54 + default: 39 55 } 40 56 41 - switch self.Mode { 57 + switch s.Mode { 42 58 case StdoutStoreModePassthrough: 43 - fmt.Println(event) 59 + fmt.Printf("%+v\n", event) 44 60 case StdoutStoreModeStats: 45 - if event.Kind == "commit" && event.Commit.Operation == "create" { 46 - self.NumRecords++ 47 - } 61 + s.updateStats(event) 48 62 } 49 63 } 64 + return nil 65 + } 50 66 51 - if self.Mode == StdoutStoreModeStats { 52 - // TODO make this more interesting 53 - fmt.Printf("Stats for repo %s\n", self.Did) 54 - fmt.Printf("%d records", self.NumRecords) 67 + func (s *StdoutStore) updateStats(event remote.StreamEvent) { 68 + stats, exists := s.stats[event.Did] 69 + if !exists { 70 + stats = &repoStats{ 71 + collections: make(map[string]int), 72 + } 73 + s.stats[event.Did] = stats 74 + } 75 + 76 + switch event.Kind { 77 + case remote.EventKindCommit: 78 + stats.numCommits++ 79 + if event.Commit != nil { 80 + stats.numRecords++ 81 + stats.collections[event.Commit.Collection]++ 82 + } 83 + case remote.EventKindError: 84 + stats.numErrors++ 55 85 } 86 + } 56 87 57 - return nil 88 + func (s *StdoutStore) printStats() { 89 + fmt.Println("\n=== Repository Statistics ===") 90 + for did, stats := range s.stats { 91 + fmt.Printf("\nRepo: %s\n", did) 92 + fmt.Printf(" Records: %d\n", stats.numRecords) 93 + fmt.Printf(" Commits: %d\n", stats.numCommits) 94 + if stats.numErrors > 0 { 95 + fmt.Printf(" Errors: %d\n", stats.numErrors) 96 + } 97 + 98 + if len(stats.collections) > 0 { 99 + fmt.Println(" Collections:") 100 + for col, count := range stats.collections { 101 + fmt.Printf(" %s: %d\n", col, count) 102 + } 103 + } 104 + } 58 105 }
+23 -6
cmd/butterfly/store/store.go
··· 1 + // Package store defines interfaces for persisting AT Protocol data 1 2 package store 2 3 3 - import "github.com/bluesky-social/indigo/cmd/butterfly/remote" 4 + import ( 5 + "context" 4 6 7 + "github.com/bluesky-social/indigo/cmd/butterfly/remote" 8 + ) 9 + 10 + // Store defines the interface for data persistence in the butterfly sync engine 5 11 type Store interface { 6 - // Initialize the store 7 - Setup() error 12 + // Setup initializes the store 13 + Setup(ctx context.Context) error 8 14 9 - // Teardown the store 15 + // Close tears down the store and releases resources 10 16 Close() error 11 17 12 - // Subscribe to a record emitter 13 - Receive(s *remote.RemoteStream) error 18 + // Receive processes events from a remote stream 19 + // The implementation should handle context cancellation appropriately 20 + Receive(ctx context.Context, stream *remote.RemoteStream) error 14 21 } 22 + 23 + // StoreType identifies the type of store 24 + type StoreType string 25 + 26 + const ( 27 + StoreTypeStdout StoreType = "stdout" 28 + StoreTypeDuckDB StoreType = "duckdb" 29 + StoreTypeClickHouse StoreType = "clickhouse" 30 + StoreTypeTarFiles StoreType = "tarfiles" 31 + )
+4
cmd/butterfly/store/tarfiles.go
··· 11 11 ) 12 12 13 13 type TarfilesStore struct { 14 + // The directory to store the .tar files 15 + // Each repository is stored as a single .tar file 16 + // The contents of the .tar file is a collection of json files 17 + // The directory structure is based on the cllections 14 18 dirpath string 15 19 } 16 20