this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

some more cleanup

+71 -225
+35 -110
archiver/archiver.go
··· 19 19 "github.com/bluesky-social/indigo/models" 20 20 "github.com/bluesky-social/indigo/repomgr" 21 21 "github.com/bluesky-social/indigo/util/svcutil" 22 - "github.com/bluesky-social/indigo/xrpc" 23 22 lru "github.com/hashicorp/golang-lru/v2" 24 23 ipld "github.com/ipfs/go-ipld-format" 25 24 "github.com/labstack/echo/v4" ··· 34 33 var tracer = otel.Tracer("archiver") 35 34 36 35 type Archiver struct { 37 - Index *Indexer 38 36 db *gorm.DB 39 37 slurper *bgs.Slurper 40 38 didr did.Resolver 39 + 40 + crawler *CrawlDispatcher 41 41 42 42 // TODO: work on doing away with this flag in favor of more pluggable 43 43 // pieces that abstract the need for explicit ssl checks ··· 60 60 pdsResyncsLk sync.RWMutex 61 61 pdsResyncs map[uint]*bgs.PDSResync 62 62 63 - // Management of Compaction 64 - compactor *bgs.Compactor 65 - 66 63 // User cache 67 64 userCache *lru.Cache[string, *User] 68 65 69 - // nextCrawlers gets forwarded POST /xrpc/com.atproto.sync.requestCrawl 70 - nextCrawlers []*url.URL 71 - httpClient http.Client 66 + httpClient http.Client 72 67 73 68 log *slog.Logger 74 69 } ··· 96 91 } 97 92 } 98 93 99 - func NewArchiver(db *gorm.DB, ix *Indexer, repoman *repomgr.RepoManager, didr did.Resolver, rf *RepoFetcher, config *ArchiverConfig) (*Archiver, error) { 94 + func NewArchiver(db *gorm.DB, repoman *repomgr.RepoManager, didr did.Resolver, rf *RepoFetcher, config *ArchiverConfig) (*Archiver, error) { 100 95 if config == nil { 101 96 config = DefaultArchiverConfig() 102 97 } ··· 107 102 108 103 uc, _ := lru.New[string, *User](1_000_000) 109 104 105 + log := slog.Default().With("system", "archiver") 106 + 107 + c, err := NewCrawlDispatcher(rf, rf.MaxConcurrency, log) 108 + if err != nil { 109 + return nil, err 110 + } 111 + 112 + c.Run() 113 + 110 114 arc := &Archiver{ 111 - Index: ix, 112 - db: db, 115 + crawler: c, 116 + db: db, 113 117 114 118 repoman: repoman, 115 119 didr: didr, ··· 122 126 123 127 userCache: uc, 124 128 125 - log: slog.Default().With("system", "archiver"), 129 + log: log, 126 130 } 127 131 128 - ix.CreateExternalUser = arc.handleUserUpdate 129 132 slOpts := bgs.DefaultSlurperOptions() 130 133 slOpts.SSL = config.SSL 131 134 slOpts.DefaultRepoLimit = config.DefaultRepoLimit ··· 142 145 return nil, err 143 146 } 144 147 145 - cOpts := bgs.DefaultCompactorOptions() 146 - cOpts.NumWorkers = config.NumCompactionWorkers 147 - compactor := bgs.NewCompactor(cOpts) 148 - compactor.RequeueInterval = config.CompactInterval 149 - // TODO: compactor shenanigans 150 - //compactor.Start(arc) 151 - arc.compactor = compactor 152 - 153 - arc.nextCrawlers = config.NextCrawlers 154 148 arc.httpClient.Timeout = time.Second * 5 155 149 156 150 return arc, nil ··· 163 157 Did string `gorm:"uniqueindex"` 164 158 PDS uint 165 159 166 - // TakenDown is set to true if the user in question has been taken down. 167 - // A user in this state will have all future events related to it dropped 168 - // and no data about this user will be served. 169 - TakenDown bool 170 - Tombstoned bool 171 - 172 160 // UpstreamStatus is the state of the user as reported by the upstream PDS 173 161 UpstreamStatus string `gorm:"index"` 174 162 175 163 lk sync.Mutex 176 164 } 177 165 178 - func (u *User) SetTakenDown(v bool) { 179 - u.lk.Lock() 180 - defer u.lk.Unlock() 181 - u.TakenDown = v 182 - } 183 - 184 - func (u *User) GetTakenDown() bool { 185 - u.lk.Lock() 186 - defer u.lk.Unlock() 187 - return u.TakenDown 188 - } 189 - 190 - func (u *User) SetTombstoned(v bool) { 191 - u.lk.Lock() 192 - defer u.lk.Unlock() 193 - u.Tombstoned = v 194 - } 195 - 196 - func (u *User) GetTombstoned() bool { 197 - u.lk.Lock() 198 - defer u.lk.Unlock() 199 - return u.Tombstoned 200 - } 201 - 202 166 func (u *User) SetUpstreamStatus(v string) { 203 167 u.lk.Lock() 204 168 defer u.lk.Unlock() ··· 220 184 s.log.Debug("create external user", "did", did) 221 185 doc, err := s.didr.GetDocument(ctx, did) 222 186 if err != nil { 223 - return nil, fmt.Errorf("could not locate DID document for followed user (%s): %w", did, err) 187 + return nil, fmt.Errorf("could not locate DID document for user (%s): %w", did, err) 224 188 } 225 189 226 190 if len(doc.Service) == 0 { 227 - return nil, fmt.Errorf("external followed user %s had no services in did document", did) 191 + return nil, fmt.Errorf("user %s had no services in did document", did) 228 192 } 229 193 230 194 svc := doc.Service[0] ··· 244 208 return nil, err 245 209 } 246 210 247 - c := &xrpc.Client{Host: durl.String()} 248 - s.Index.ApplyPDSClientSettings(c) 249 - 250 211 if peering.ID == 0 { 251 212 peering.Host = durl.Host 252 213 peering.SSL = (durl.Scheme == "https") ··· 307 268 s.extUserLk.Lock() 308 269 defer s.extUserLk.Unlock() 309 270 310 - exu, err := s.Index.LookupUserByDid(ctx, did) 271 + exu, err := s.LookupUserByDid(ctx, did) 311 272 if err == nil { 312 273 s.log.Debug("lost the race to create a new user", "did", did) 313 274 if exu.PDS != peering.ID { ··· 361 322 u, err := s.handleUserUpdate(ctx, evt.Repo) 362 323 userLookupDuration.Observe(time.Since(st).Seconds()) 363 324 if err != nil { 364 - if !errors.Is(err, gorm.ErrRecordNotFound) { 365 - repoCommitsResultCounter.WithLabelValues(host.Host, "nou").Inc() 366 - return fmt.Errorf("looking up event user: %w", err) 367 - } 368 - 369 - newUsersDiscovered.Inc() 370 - start := time.Now() 371 - subj, err := s.handleUserUpdate(ctx, evt.Repo) 372 - newUserDiscoveryDuration.Observe(time.Since(start).Seconds()) 373 - if err != nil { 374 - repoCommitsResultCounter.WithLabelValues(host.Host, "uerr").Inc() 375 - return fmt.Errorf("fed event create external user: %w", err) 376 - } 377 - 378 - u = subj 325 + return fmt.Errorf("looking up event user: %w", err) 379 326 } 380 327 381 328 ustatus := u.GetUpstreamStatus() 382 329 span.SetAttributes(attribute.String("upstream_status", ustatus)) 383 330 384 - if u.GetTakenDown() || ustatus == events.AccountStatusTakendown { 385 - span.SetAttributes(attribute.Bool("taken_down_by_relay_admin", u.GetTakenDown())) 331 + switch ustatus { 332 + case events.AccountStatusTakendown: 386 333 s.log.Debug("dropping commit event from taken down user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host) 387 334 repoCommitsResultCounter.WithLabelValues(host.Host, "tdu").Inc() 388 335 return nil 389 - } 390 - 391 - if ustatus == events.AccountStatusSuspended { 336 + case events.AccountStatusSuspended: 392 337 s.log.Debug("dropping commit event from suspended user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host) 393 338 repoCommitsResultCounter.WithLabelValues(host.Host, "susu").Inc() 394 339 return nil 395 - } 396 - 397 - if ustatus == events.AccountStatusDeactivated { 340 + case events.AccountStatusDeactivated: 398 341 s.log.Debug("dropping commit event from deactivated user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host) 399 342 repoCommitsResultCounter.WithLabelValues(host.Host, "du").Inc() 400 343 return nil ··· 405 348 return fmt.Errorf("rebase was true in event seq:%d,host:%s", evt.Seq, host.Host) 406 349 } 407 350 408 - if host.ID != u.PDS && u.PDS != 0 { 351 + if host.ID != u.PDS && u.PDS != 0 && !host.Trusted { 409 352 s.log.Warn("received event for repo from different pds than expected", "repo", evt.Repo, "expPds", u.PDS, "gotPds", host.Host) 410 353 // Flush any cached DID documents for this user 411 354 s.didr.FlushCacheFor(env.RepoCommit.Repo) ··· 422 365 } 423 366 } 424 367 425 - if u.GetTombstoned() { 426 - span.SetAttributes(attribute.Bool("tombstoned", true)) 427 - // we've checked the authority of the users PDS, so reinstate the account 428 - if err := s.db.Model(&User{}).Where("id = ?", u.ID).UpdateColumn("tombstoned", false).Error; err != nil { 429 - repoCommitsResultCounter.WithLabelValues(host.Host, "tomb").Inc() 430 - return fmt.Errorf("failed to un-tombstone a user: %w", err) 431 - } 432 - u.SetTombstoned(false) 433 - 434 - ai, err := s.Index.LookupUser(ctx, u.ID) 435 - if err != nil { 436 - repoCommitsResultCounter.WithLabelValues(host.Host, "nou2").Inc() 437 - return fmt.Errorf("failed to look up user (tombstone recover): %w", err) 438 - } 439 - 440 - // Now a simple re-crawl should suffice to bring the user back online 441 - repoCommitsResultCounter.WithLabelValues(host.Host, "catchupt").Inc() 442 - return s.Index.Crawler.AddToCatchupQueue(ctx, host, ai, evt) 443 - } 444 - 445 368 // skip the fast path for rebases or if the user is already in the slow path 446 - if s.Index.Crawler.RepoInSlowPath(ctx, u.ID) { 447 - ai, err := s.Index.LookupUser(ctx, u.ID) 369 + if s.crawler.RepoInSlowPath(ctx, u.ID) { 370 + ai, err := s.LookupUser(ctx, u.ID) 448 371 if err != nil { 449 372 repoCommitsResultCounter.WithLabelValues(host.Host, "nou3").Inc() 450 373 return fmt.Errorf("failed to look up user (slow path): %w", err) ··· 459 382 // whether or not we even need this 'slow path' logic, as it makes 460 383 // accounting for which events have been processed much harder 461 384 repoCommitsResultCounter.WithLabelValues(host.Host, "catchup").Inc() 462 - return s.Index.Crawler.AddToCatchupQueue(ctx, host, ai, evt) 385 + return s.crawler.AddToCatchupQueue(ctx, host, ai, evt) 463 386 } 464 387 465 388 if err := s.repoman.HandleExternalUserEvent(ctx, host.ID, u.ID, u.Did, evt.Since, evt.Rev, evt.Blocks, evt.Ops); err != nil { 466 389 467 390 if errors.Is(err, carstore.ErrRepoBaseMismatch) || ipld.IsNotFound(err) { 468 - ai, lerr := s.Index.LookupUser(ctx, u.ID) 391 + ai, lerr := s.LookupUser(ctx, u.ID) 469 392 if lerr != nil { 470 393 log.Warn("failed handling event, no user", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "commit", evt.Commit.String()) 471 394 repoCommitsResultCounter.WithLabelValues(host.Host, "nou4").Inc() ··· 476 399 477 400 log.Info("failed handling event, catchup", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "commit", evt.Commit.String()) 478 401 repoCommitsResultCounter.WithLabelValues(host.Host, "catchup2").Inc() 479 - return s.Index.Crawler.AddToCatchupQueue(ctx, host, ai, evt) 402 + return s.crawler.AddToCatchupQueue(ctx, host, ai, evt) 480 403 } 481 404 482 405 log.Warn("failed handling event", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "commit", evt.Commit.String()) ··· 487 410 repoCommitsResultCounter.WithLabelValues(host.Host, "ok").Inc() 488 411 return nil 489 412 case env.RepoIdentity != nil: 490 - s.log.Info("bgs got identity event", "did", env.RepoIdentity.Did) 413 + s.log.Info("archiver got identity event", "did", env.RepoIdentity.Did) 491 414 // Flush any cached DID documents for this user 492 415 s.didr.FlushCacheFor(env.RepoIdentity.Did) 493 416 ··· 509 432 span.SetAttributes(attribute.String("repo_status", *env.RepoAccount.Status)) 510 433 } 511 434 512 - s.log.Info("bgs got account event", "did", env.RepoAccount.Did) 435 + s.log.Info("archiver got account event", "did", env.RepoAccount.Did) 513 436 // Flush any cached DID documents for this user 514 437 s.didr.FlushCacheFor(env.RepoAccount.Did) 515 438 ··· 823 746 func (s *Archiver) Shutdown() []error { 824 747 errs := s.slurper.Shutdown() 825 748 826 - s.compactor.Shutdown() 749 + if s.crawler != nil { 750 + s.crawler.Shutdown() 751 + } 827 752 828 753 return errs 829 754 }
+9 -10
archiver/crawler.go
··· 199 199 c.maplk.Lock() 200 200 defer c.maplk.Unlock() 201 201 202 - // If the actor crawl is enqueued, we can append to the catchup queue which gets emptied during the crawl 202 + // If the actor crawl is enqueued, we can ignore this event as it will 203 + // be included in the repo when we fetch it 203 204 job, ok := c.todo[catchup.user.ID] 204 205 if ok { 205 206 catchupEventsEnqueued.WithLabelValues("todo").Inc() ··· 221 222 act: catchup.user, 222 223 catchup: []*catchupJob{catchup}, 223 224 } 225 + 224 226 c.todo[catchup.user.ID] = cw 225 227 return cw 226 228 } 227 229 228 230 func (c *CrawlDispatcher) fetchWorker() { 229 - for { 230 - select { 231 - case job := <-c.repoSync: 232 - if err := c.repoFetcher.FetchAndIndexRepo(context.TODO(), job); err != nil { 233 - c.log.Error("failed to perform repo crawl", "did", job.act.Did, "err", err) 234 - } 235 - 236 - // TODO: do we still just do this if it errors? 237 - c.complete <- job.act.ID 231 + for job := range c.repoSync { 232 + if err := c.repoFetcher.FetchAndIndexRepo(context.TODO(), job); err != nil { 233 + c.log.Error("failed to perform repo crawl", "did", job.act.Did, "err", err) 238 234 } 235 + 236 + // TODO: do we still just do this if it errors? 237 + c.complete <- job.act.ID 239 238 } 240 239 } 241 240
+7 -18
archiver/handlers.go
··· 36 36 return echo.NewHTTPError(http.StatusInternalServerError, "failed to lookup user") 37 37 } 38 38 39 - if u.GetTombstoned() { 40 - return fmt.Errorf("account was deleted") 41 - } 42 - 43 - if u.GetTakenDown() { 44 - return fmt.Errorf("account was taken down by the Relay") 45 - } 46 - 47 39 ustatus := u.GetUpstreamStatus() 48 - if ustatus == events.AccountStatusTakendown { 40 + switch ustatus { 41 + case events.AccountStatusTakendown: 49 42 return fmt.Errorf("account was taken down by its PDS") 50 - } 51 - 52 - if ustatus == events.AccountStatusDeactivated { 43 + case events.AccountStatusDeactivated: 53 44 return fmt.Errorf("account is temporarily deactivated") 54 - } 55 - 56 - if ustatus == events.AccountStatusSuspended { 45 + case events.AccountStatusSuspended: 57 46 return fmt.Errorf("account is suspended by its PDS") 58 47 } 59 48 ··· 74 63 if err := s.db.Exec("SELECT 1").Error; err != nil { 75 64 s.log.Error("healthcheck can't connect to database", "err", err) 76 65 return c.JSON(500, HealthStatus{Status: "error", Message: "can't connect to database"}) 77 - } else { 78 - return c.JSON(200, HealthStatus{Status: "ok"}) 79 66 } 67 + 68 + return c.JSON(200, HealthStatus{Status: "ok"}) 80 69 } 81 70 82 - var homeMessage string = ` 71 + var homeMessage = ` 83 72 [ insert fancy archiver art here ] 84 73 ` 85 74
+16 -78
archiver/loader.go
··· 4 4 "context" 5 5 "errors" 6 6 "fmt" 7 - "log/slog" 8 7 9 - "github.com/bluesky-social/indigo/did" 10 - "github.com/bluesky-social/indigo/events" 11 8 "github.com/bluesky-social/indigo/models" 12 - "github.com/bluesky-social/indigo/xrpc" 13 9 14 10 "go.opentelemetry.io/otel" 15 11 "gorm.io/gorm" 16 12 ) 17 13 18 - const MaxEventSliceLength = 1000000 19 - const MaxOpsSliceLength = 200 20 - 21 - type Indexer struct { 22 - db *gorm.DB 23 - 24 - didr did.Resolver 25 - 26 - Crawler *CrawlDispatcher 27 - 28 - CreateExternalUser func(context.Context, string) (*User, error) 29 - ApplyPDSClientSettings func(*xrpc.Client) 30 - 31 - log *slog.Logger 32 - } 33 - 34 - type AddEventFunc func(ctx context.Context, ev *events.XRPCStreamEvent) error 35 - 36 - func NewIndexer(db *gorm.DB, didr did.Resolver, fetcher *RepoFetcher, crawl bool) (*Indexer, error) { 37 - ix := &Indexer{ 38 - db: db, 39 - didr: didr, 40 - ApplyPDSClientSettings: func(*xrpc.Client) {}, 41 - log: slog.Default().With("system", "indexer"), 42 - } 43 - 44 - if crawl { 45 - c, err := NewCrawlDispatcher(fetcher, fetcher.MaxConcurrency, ix.log) 46 - if err != nil { 47 - return nil, err 48 - } 49 - 50 - ix.Crawler = c 51 - ix.Crawler.Run() 52 - } 53 - 54 - return ix, nil 55 - } 56 - 57 - func (ix *Indexer) Shutdown() { 58 - if ix.Crawler != nil { 59 - ix.Crawler.Shutdown() 60 - } 61 - } 62 - 63 - func (ix *Indexer) GetUserOrMissing(ctx context.Context, did string) (*User, error) { 14 + func (s *Archiver) GetUserOrMissing(ctx context.Context, did string) (*User, error) { 64 15 ctx, span := otel.Tracer("indexer").Start(ctx, "getUserOrMissing") 65 16 defer span.End() 66 17 67 - ai, err := ix.LookupUserByDid(ctx, did) 18 + ai, err := s.LookupUserByDid(ctx, did) 68 19 if err == nil { 69 20 return ai, nil 70 21 } ··· 74 25 } 75 26 76 27 // unknown user... create it and send it off to the crawler 77 - return ix.createMissingUserRecord(ctx, did) 28 + return s.createMissingUserRecord(ctx, did) 78 29 } 79 30 80 - func (ix *Indexer) createMissingUserRecord(ctx context.Context, did string) (*User, error) { 31 + func (s *Archiver) createMissingUserRecord(ctx context.Context, did string) (*User, error) { 81 32 ctx, span := otel.Tracer("indexer").Start(ctx, "createMissingUserRecord") 82 33 defer span.End() 83 34 84 35 externalUserCreationAttempts.Inc() 85 36 86 - ai, err := ix.CreateExternalUser(ctx, did) 37 + ai, err := s.handleUserUpdate(ctx, did) 87 38 if err != nil { 88 39 return nil, err 89 40 } 90 41 91 - if err := ix.addUserToCrawler(ctx, ai); err != nil { 42 + if err := s.addUserToCrawler(ctx, ai); err != nil { 92 43 return nil, fmt.Errorf("failed to add unknown user to crawler: %w", err) 93 44 } 94 45 95 46 return ai, nil 96 47 } 97 48 98 - func (ix *Indexer) addUserToCrawler(ctx context.Context, ai *User) error { 99 - ix.log.Debug("Sending user to crawler: ", "did", ai.Did) 100 - if ix.Crawler == nil { 49 + func (s *Archiver) addUserToCrawler(ctx context.Context, ai *User) error { 50 + s.log.Debug("Sending user to crawler: ", "did", ai.Did) 51 + if s.crawler == nil { 101 52 return nil 102 53 } 103 54 104 - return ix.Crawler.Crawl(ctx, ai) 55 + return s.crawler.Crawl(ctx, ai) 105 56 } 106 57 107 - func (ix *Indexer) DidForUser(ctx context.Context, uid models.Uid) (string, error) { 58 + func (s *Archiver) DidForUser(ctx context.Context, uid models.Uid) (string, error) { 108 59 var ai User 109 - if err := ix.db.First(&ai, "uid = ?", uid).Error; err != nil { 60 + if err := s.db.First(&ai, "id = ?", uid).Error; err != nil { 110 61 return "", err 111 62 } 112 63 113 64 return ai.Did, nil 114 65 } 115 66 116 - func (ix *Indexer) LookupUser(ctx context.Context, id models.Uid) (*User, error) { 67 + func (s *Archiver) LookupUser(ctx context.Context, id models.Uid) (*User, error) { 117 68 var ai User 118 - if err := ix.db.First(&ai, "uid = ?", id).Error; err != nil { 69 + if err := s.db.First(&ai, "id = ?", id).Error; err != nil { 119 70 return nil, err 120 71 } 121 72 122 73 return &ai, nil 123 74 } 124 75 125 - func (ix *Indexer) LookupUserByDid(ctx context.Context, did string) (*User, error) { 76 + func (s *Archiver) LookupUserByDid(ctx context.Context, did string) (*User, error) { 126 77 var ai User 127 - if err := ix.db.Find(&ai, "did = ?", did).Error; err != nil { 128 - return nil, err 129 - } 130 - 131 - if ai.ID == 0 { 132 - return nil, gorm.ErrRecordNotFound 133 - } 134 - 135 - return &ai, nil 136 - } 137 - 138 - func (ix *Indexer) LookupUserByHandle(ctx context.Context, handle string) (*User, error) { 139 - var ai User 140 - if err := ix.db.Find(&ai, "handle = ?", handle).Error; err != nil { 78 + if err := s.db.Find(&ai, "did = ?", did).Error; err != nil { 141 79 return nil, err 142 80 } 143 81
+2 -9
cmd/archit/main.go
··· 327 327 328 328 rf := archiver.NewRepoFetcher(db, repoman, cctx.Int("max-fetch-concurrency")) 329 329 330 - ix, err := archiver.NewIndexer(db, cachedidr, rf, true) 331 - if err != nil { 332 - return err 333 - } 334 - defer ix.Shutdown() 335 - 336 330 rlskip := cctx.String("bsky-social-rate-limit-skip") 337 - ix.ApplyPDSClientSettings = func(c *xrpc.Client) { 331 + rf.ApplyPDSClientSettings = func(c *xrpc.Client) { 338 332 if c.Client == nil { 339 333 c.Client = util.RobustHTTPClient() 340 334 } ··· 350 344 c.Client.Timeout = time.Minute * 1 351 345 } 352 346 } 353 - rf.ApplyPDSClientSettings = ix.ApplyPDSClientSettings 354 347 355 348 slog.Info("constructing archiver") 356 349 archiverConfig := archiver.DefaultArchiverConfig() ··· 374 367 archiverConfig.NextCrawlers = nextCrawlerUrls 375 368 } 376 369 377 - arc, err := archiver.NewArchiver(db, ix, repoman, cachedidr, rf, archiverConfig) 370 + arc, err := archiver.NewArchiver(db, repoman, cachedidr, rf, archiverConfig) 378 371 if err != nil { 379 372 return err 380 373 }
+2
models/models.go
··· 111 111 Registered bool 112 112 Blocked bool 113 113 114 + Trusted bool 115 + 114 116 RateLimit float64 115 117 CrawlRateLimit float64 116 118