this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

first pass at yoinking bgs code and hacking it until it looks like an archival service

+4687 -21
+921
archiver/archiver.go
··· 1 + package archiver 2 + 3 + import ( 4 + "context" 5 + "database/sql" 6 + "errors" 7 + "fmt" 8 + "log/slog" 9 + "net" 10 + "net/http" 11 + "net/url" 12 + "strings" 13 + "sync" 14 + "time" 15 + 16 + "github.com/bluesky-social/indigo/api/atproto" 17 + "github.com/bluesky-social/indigo/bgs" 18 + "github.com/bluesky-social/indigo/carstore" 19 + "github.com/bluesky-social/indigo/did" 20 + "github.com/bluesky-social/indigo/events" 21 + "github.com/bluesky-social/indigo/handles" 22 + "github.com/bluesky-social/indigo/models" 23 + "github.com/bluesky-social/indigo/repomgr" 24 + "github.com/bluesky-social/indigo/util/svcutil" 25 + "github.com/bluesky-social/indigo/xrpc" 26 + lru "github.com/hashicorp/golang-lru/v2" 27 + ipld "github.com/ipfs/go-ipld-format" 28 + "github.com/labstack/echo/v4" 29 + "github.com/labstack/echo/v4/middleware" 30 + "github.com/labstack/gommon/log" 31 + "github.com/prometheus/client_golang/prometheus/promhttp" 32 + "go.opentelemetry.io/otel" 33 + "go.opentelemetry.io/otel/attribute" 34 + "gorm.io/gorm" 35 + ) 36 + 37 + var tracer = otel.Tracer("archiver") 38 + 39 + type Archiver struct { 40 + Index *Indexer 41 + db *gorm.DB 42 + slurper *bgs.Slurper 43 + didr did.Resolver 44 + 45 + hr handles.HandleResolver 46 + 47 + // TODO: work on doing away with this flag in favor of more pluggable 48 + // pieces that abstract the need for explicit ssl checks 49 + ssl bool 50 + 51 + crawlOnly bool 52 + 53 + // TODO: at some point we will want to lock specific DIDs, this lock as is 54 + // is overly broad, but i dont expect it to be a bottleneck for now 55 + extUserLk sync.Mutex 56 + 57 + repoman *repomgr.RepoManager 58 + 59 + // Management of Socket Consumers 60 + consumersLk sync.RWMutex 61 + nextConsumerID uint64 62 + consumers map[uint64]*bgs.SocketConsumer 63 + 64 + // Management of Resyncs 65 + pdsResyncsLk sync.RWMutex 66 + pdsResyncs map[uint]*bgs.PDSResync 67 + 68 + // Management of Compaction 69 + compactor *bgs.Compactor 70 + 71 + // User cache 72 + userCache *lru.Cache[string, *User] 73 + 74 + // nextCrawlers gets forwarded POST /xrpc/com.atproto.sync.requestCrawl 75 + nextCrawlers []*url.URL 76 + httpClient http.Client 77 + 78 + log *slog.Logger 79 + } 80 + 81 + type ArchiverConfig struct { 82 + SSL bool 83 + CompactInterval time.Duration 84 + DefaultRepoLimit int64 85 + ConcurrencyPerPDS int64 86 + MaxQueuePerPDS int64 87 + NumCompactionWorkers int 88 + 89 + // NextCrawlers gets forwarded POST /xrpc/com.atproto.sync.requestCrawl 90 + NextCrawlers []*url.URL 91 + } 92 + 93 + func DefaultArchiverConfig() *ArchiverConfig { 94 + return &ArchiverConfig{ 95 + SSL: true, 96 + CompactInterval: 4 * time.Hour, 97 + DefaultRepoLimit: 100, 98 + ConcurrencyPerPDS: 100, 99 + MaxQueuePerPDS: 1_000, 100 + NumCompactionWorkers: 2, 101 + } 102 + } 103 + 104 + func NewArchiver(db *gorm.DB, ix *Indexer, repoman *repomgr.RepoManager, didr did.Resolver, rf *RepoFetcher, hr handles.HandleResolver, config *ArchiverConfig) (*Archiver, error) { 105 + if config == nil { 106 + config = DefaultArchiverConfig() 107 + } 108 + db.AutoMigrate(User{}) 109 + db.AutoMigrate(AuthToken{}) 110 + db.AutoMigrate(models.PDS{}) 111 + db.AutoMigrate(models.DomainBan{}) 112 + 113 + uc, _ := lru.New[string, *User](1_000_000) 114 + 115 + arc := &Archiver{ 116 + Index: ix, 117 + db: db, 118 + 119 + hr: hr, 120 + repoman: repoman, 121 + didr: didr, 122 + ssl: config.SSL, 123 + 124 + consumersLk: sync.RWMutex{}, 125 + consumers: make(map[uint64]*bgs.SocketConsumer), 126 + 127 + pdsResyncs: make(map[uint]*bgs.PDSResync), 128 + 129 + userCache: uc, 130 + 131 + log: slog.Default().With("system", "archiver"), 132 + } 133 + 134 + ix.CreateExternalUser = arc.handleUserUpdate 135 + slOpts := bgs.DefaultSlurperOptions() 136 + slOpts.SSL = config.SSL 137 + slOpts.DefaultRepoLimit = config.DefaultRepoLimit 138 + slOpts.ConcurrencyPerPDS = config.ConcurrencyPerPDS 139 + slOpts.MaxQueuePerPDS = config.MaxQueuePerPDS 140 + s, err := bgs.NewSlurper(db, arc.handleFedEvent, slOpts) 141 + if err != nil { 142 + return nil, err 143 + } 144 + 145 + arc.slurper = s 146 + 147 + if err := arc.slurper.RestartAll(); err != nil { 148 + return nil, err 149 + } 150 + 151 + cOpts := bgs.DefaultCompactorOptions() 152 + cOpts.NumWorkers = config.NumCompactionWorkers 153 + compactor := bgs.NewCompactor(cOpts) 154 + compactor.RequeueInterval = config.CompactInterval 155 + // TODO: compactor shenanigans 156 + //compactor.Start(arc) 157 + arc.compactor = compactor 158 + 159 + arc.nextCrawlers = config.NextCrawlers 160 + arc.httpClient.Timeout = time.Second * 5 161 + 162 + return arc, nil 163 + 164 + } 165 + 166 + type User struct { 167 + gorm.Model 168 + ID models.Uid `gorm:"primarykey;index:idx_user_id_active,where:taken_down = false AND tombstoned = false"` 169 + Handle sql.NullString `gorm:"index"` 170 + DisplayName string 171 + Did string `gorm:"uniqueindex"` 172 + Following int64 173 + Followers int64 174 + Posts int64 175 + Type string 176 + PDS uint 177 + ValidHandle bool `gorm:"default:true"` 178 + 179 + // TakenDown is set to true if the user in question has been taken down. 180 + // A user in this state will have all future events related to it dropped 181 + // and no data about this user will be served. 182 + TakenDown bool 183 + Tombstoned bool 184 + 185 + // UpstreamStatus is the state of the user as reported by the upstream PDS 186 + UpstreamStatus string `gorm:"index"` 187 + 188 + lk sync.Mutex 189 + } 190 + 191 + func (u *User) SetTakenDown(v bool) { 192 + u.lk.Lock() 193 + defer u.lk.Unlock() 194 + u.TakenDown = v 195 + } 196 + 197 + func (u *User) GetTakenDown() bool { 198 + u.lk.Lock() 199 + defer u.lk.Unlock() 200 + return u.TakenDown 201 + } 202 + 203 + func (u *User) SetTombstoned(v bool) { 204 + u.lk.Lock() 205 + defer u.lk.Unlock() 206 + u.Tombstoned = v 207 + } 208 + 209 + func (u *User) GetTombstoned() bool { 210 + u.lk.Lock() 211 + defer u.lk.Unlock() 212 + return u.Tombstoned 213 + } 214 + 215 + func (u *User) SetUpstreamStatus(v string) { 216 + u.lk.Lock() 217 + defer u.lk.Unlock() 218 + u.UpstreamStatus = v 219 + } 220 + 221 + func (u *User) GetUpstreamStatus() string { 222 + u.lk.Lock() 223 + defer u.lk.Unlock() 224 + return u.UpstreamStatus 225 + } 226 + 227 + func (s *Archiver) handleUserUpdate(ctx context.Context, did string) (*User, error) { 228 + ctx, span := tracer.Start(ctx, "handleUserUpdate") 229 + defer span.End() 230 + 231 + externalUserCreationAttempts.Inc() 232 + 233 + s.log.Debug("create external user", "did", did) 234 + doc, err := s.didr.GetDocument(ctx, did) 235 + if err != nil { 236 + return nil, fmt.Errorf("could not locate DID document for followed user (%s): %w", did, err) 237 + } 238 + 239 + if len(doc.Service) == 0 { 240 + return nil, fmt.Errorf("external followed user %s had no services in did document", did) 241 + } 242 + 243 + svc := doc.Service[0] 244 + durl, err := url.Parse(svc.ServiceEndpoint) 245 + if err != nil { 246 + return nil, err 247 + } 248 + 249 + if strings.HasPrefix(durl.Host, "localhost:") { 250 + durl.Scheme = "http" 251 + } 252 + 253 + // TODO: the PDS's DID should also be in the service, we could use that to look up? 254 + var peering models.PDS 255 + if err := s.db.Find(&peering, "host = ?", durl.Host).Error; err != nil { 256 + s.log.Error("failed to find pds", "host", durl.Host) 257 + return nil, err 258 + } 259 + 260 + /* 261 + // TODO: ignore this because we're just gonna get the stream from the relay anyways 262 + ban, err := s.domainIsBanned(ctx, durl.Host) 263 + if err != nil { 264 + return nil, fmt.Errorf("failed to check pds ban status: %w", err) 265 + } 266 + 267 + if ban { 268 + return nil, fmt.Errorf("cannot create user on pds with banned domain") 269 + } 270 + */ 271 + 272 + c := &xrpc.Client{Host: durl.String()} 273 + s.Index.ApplyPDSClientSettings(c) 274 + 275 + if peering.ID == 0 { 276 + // TODO: the case of handling a new user on a new PDS probably requires more thought 277 + cfg, err := atproto.ServerDescribeServer(ctx, c) 278 + if err != nil { 279 + // TODO: failing this shouldn't halt our indexing 280 + return nil, fmt.Errorf("failed to check unrecognized pds: %w", err) 281 + } 282 + 283 + // since handles can be anything, checking against this list doesn't matter... 284 + _ = cfg 285 + 286 + // TODO: could check other things, a valid response is good enough for now 287 + peering.Host = durl.Host 288 + peering.SSL = (durl.Scheme == "https") 289 + peering.CrawlRateLimit = float64(s.slurper.DefaultCrawlLimit) 290 + peering.RateLimit = float64(s.slurper.DefaultPerSecondLimit) 291 + peering.HourlyEventLimit = s.slurper.DefaultPerHourLimit 292 + peering.DailyEventLimit = s.slurper.DefaultPerDayLimit 293 + peering.RepoLimit = s.slurper.DefaultRepoLimit 294 + 295 + if s.ssl && !peering.SSL { 296 + return nil, fmt.Errorf("did references non-ssl PDS, this is disallowed in prod: %q %q", did, svc.ServiceEndpoint) 297 + } 298 + 299 + if err := s.db.Create(&peering).Error; err != nil { 300 + return nil, err 301 + } 302 + } 303 + 304 + if peering.ID == 0 { 305 + panic("somehow failed to create a pds entry?") 306 + } 307 + 308 + if peering.Blocked { 309 + return nil, fmt.Errorf("refusing to create user with blocked PDS") 310 + } 311 + 312 + if peering.RepoCount >= peering.RepoLimit { 313 + return nil, fmt.Errorf("refusing to create user on PDS at max repo limit for pds %q", peering.Host) 314 + } 315 + 316 + // Increment the repo count for the PDS 317 + res := s.db.Model(&models.PDS{}).Where("id = ? AND repo_count < repo_limit", peering.ID).Update("repo_count", gorm.Expr("repo_count + 1")) 318 + if res.Error != nil { 319 + return nil, fmt.Errorf("failed to increment repo count for pds %q: %w", peering.Host, res.Error) 320 + } 321 + 322 + if res.RowsAffected == 0 { 323 + return nil, fmt.Errorf("refusing to create user on PDS at max repo limit for pds %q", peering.Host) 324 + } 325 + 326 + successfullyCreated := false 327 + 328 + // Release the count if we fail to create the user 329 + defer func() { 330 + if !successfullyCreated { 331 + if err := s.db.Model(&models.PDS{}).Where("id = ?", peering.ID).Update("repo_count", gorm.Expr("repo_count - 1")).Error; err != nil { 332 + s.log.Error("failed to decrement repo count for pds", "err", err) 333 + } 334 + } 335 + }() 336 + 337 + if len(doc.AlsoKnownAs) == 0 { 338 + return nil, fmt.Errorf("user has no 'known as' field in their DID document") 339 + } 340 + 341 + hurl, err := url.Parse(doc.AlsoKnownAs[0]) 342 + if err != nil { 343 + return nil, err 344 + } 345 + 346 + s.log.Debug("creating external user", "did", did, "handle", hurl.Host, "pds", peering.ID) 347 + 348 + handle := hurl.Host 349 + 350 + validHandle := true 351 + 352 + resdid, err := s.hr.ResolveHandleToDid(ctx, handle) 353 + if err != nil { 354 + s.log.Error("failed to resolve users claimed handle on pds", "handle", handle, "err", err) 355 + validHandle = false 356 + } 357 + 358 + if resdid != did { 359 + s.log.Error("claimed handle did not match servers response", "resdid", resdid, "did", did) 360 + validHandle = false 361 + } 362 + 363 + s.extUserLk.Lock() 364 + defer s.extUserLk.Unlock() 365 + 366 + exu, err := s.Index.LookupUserByDid(ctx, did) 367 + if err == nil { 368 + s.log.Debug("lost the race to create a new user", "did", did, "handle", handle, "existing_hand", exu.Handle) 369 + if exu.PDS != peering.ID { 370 + // User is now on a different PDS, update 371 + if err := s.db.Model(User{}).Where("id = ?", exu.ID).Update("pds", peering.ID).Error; err != nil { 372 + return nil, fmt.Errorf("failed to update users pds: %w", err) 373 + } 374 + 375 + exu.PDS = peering.ID 376 + } 377 + 378 + if exu.Handle.String != handle { 379 + // Users handle has changed, update 380 + if err := s.db.Model(User{}).Where("id = ?", exu.ID).Update("handle", handle).Error; err != nil { 381 + return nil, fmt.Errorf("failed to update users handle: %w", err) 382 + } 383 + 384 + exu.Handle = sql.NullString{String: handle, Valid: true} 385 + } 386 + return exu, nil 387 + } 388 + 389 + if !errors.Is(err, gorm.ErrRecordNotFound) { 390 + return nil, err 391 + } 392 + 393 + // TODO: request this users info from their server to fill out our data... 394 + u := User{ 395 + Did: did, 396 + PDS: peering.ID, 397 + ValidHandle: validHandle, 398 + } 399 + if validHandle { 400 + u.Handle = sql.NullString{String: handle, Valid: true} 401 + } 402 + 403 + if err := s.db.Create(&u).Error; err != nil { 404 + // If the new user's handle conflicts with an existing user, 405 + // since we just validated the handle for this user, we'll assume 406 + // the existing user no longer has control of the handle 407 + if errors.Is(err, gorm.ErrDuplicatedKey) { 408 + // Get the UID of the existing user 409 + var existingUser User 410 + if err := s.db.Find(&existingUser, "handle = ?", handle).Error; err != nil { 411 + return nil, fmt.Errorf("failed to find existing user: %w", err) 412 + } 413 + 414 + // Set the existing user's handle to NULL and set the valid_handle flag to false 415 + if err := s.db.Model(User{}).Where("id = ?", existingUser.ID).Update("handle", nil).Update("valid_handle", false).Error; err != nil { 416 + return nil, fmt.Errorf("failed to update outdated user's handle: %w", err) 417 + } 418 + 419 + // Create the new user 420 + if err := s.db.Create(&u).Error; err != nil { 421 + return nil, fmt.Errorf("failed to create user after handle conflict: %w", err) 422 + } 423 + 424 + s.userCache.Remove(did) 425 + } else { 426 + return nil, fmt.Errorf("failed to create other pds user: %w", err) 427 + } 428 + } 429 + 430 + successfullyCreated = true 431 + 432 + return &u, nil 433 + } 434 + 435 + func (s *Archiver) handleFedEvent(ctx context.Context, host *models.PDS, env *events.XRPCStreamEvent) error { 436 + ctx, span := tracer.Start(ctx, "handleFedEvent") 437 + defer span.End() 438 + 439 + start := time.Now() 440 + defer func() { 441 + eventsHandleDuration.WithLabelValues(host.Host).Observe(time.Since(start).Seconds()) 442 + }() 443 + 444 + eventsReceivedCounter.WithLabelValues(host.Host).Add(1) 445 + 446 + switch { 447 + case env.RepoCommit != nil: 448 + repoCommitsReceivedCounter.WithLabelValues(host.Host).Add(1) 449 + evt := env.RepoCommit 450 + s.log.Debug("archiver got repo append event", "seq", evt.Seq, "pdsHost", host.Host, "repo", evt.Repo) 451 + 452 + st := time.Now() 453 + u, err := s.handleUserUpdate(ctx, evt.Repo) 454 + userLookupDuration.Observe(time.Since(st).Seconds()) 455 + if err != nil { 456 + if !errors.Is(err, gorm.ErrRecordNotFound) { 457 + repoCommitsResultCounter.WithLabelValues(host.Host, "nou").Inc() 458 + return fmt.Errorf("looking up event user: %w", err) 459 + } 460 + 461 + newUsersDiscovered.Inc() 462 + start := time.Now() 463 + subj, err := s.handleUserUpdate(ctx, evt.Repo) 464 + newUserDiscoveryDuration.Observe(time.Since(start).Seconds()) 465 + if err != nil { 466 + repoCommitsResultCounter.WithLabelValues(host.Host, "uerr").Inc() 467 + return fmt.Errorf("fed event create external user: %w", err) 468 + } 469 + 470 + u = subj 471 + } 472 + 473 + ustatus := u.GetUpstreamStatus() 474 + span.SetAttributes(attribute.String("upstream_status", ustatus)) 475 + 476 + if u.GetTakenDown() || ustatus == events.AccountStatusTakendown { 477 + span.SetAttributes(attribute.Bool("taken_down_by_relay_admin", u.GetTakenDown())) 478 + s.log.Debug("dropping commit event from taken down user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host) 479 + repoCommitsResultCounter.WithLabelValues(host.Host, "tdu").Inc() 480 + return nil 481 + } 482 + 483 + if ustatus == events.AccountStatusSuspended { 484 + s.log.Debug("dropping commit event from suspended user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host) 485 + repoCommitsResultCounter.WithLabelValues(host.Host, "susu").Inc() 486 + return nil 487 + } 488 + 489 + if ustatus == events.AccountStatusDeactivated { 490 + s.log.Debug("dropping commit event from deactivated user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host) 491 + repoCommitsResultCounter.WithLabelValues(host.Host, "du").Inc() 492 + return nil 493 + } 494 + 495 + if evt.Rebase { 496 + repoCommitsResultCounter.WithLabelValues(host.Host, "rebase").Inc() 497 + return fmt.Errorf("rebase was true in event seq:%d,host:%s", evt.Seq, host.Host) 498 + } 499 + 500 + if host.ID != u.PDS && u.PDS != 0 { 501 + s.log.Warn("received event for repo from different pds than expected", "repo", evt.Repo, "expPds", u.PDS, "gotPds", host.Host) 502 + // Flush any cached DID documents for this user 503 + s.didr.FlushCacheFor(env.RepoCommit.Repo) 504 + 505 + subj, err := s.handleUserUpdate(ctx, evt.Repo) 506 + if err != nil { 507 + repoCommitsResultCounter.WithLabelValues(host.Host, "uerr2").Inc() 508 + return err 509 + } 510 + 511 + if subj.PDS != host.ID { 512 + repoCommitsResultCounter.WithLabelValues(host.Host, "noauth").Inc() 513 + return fmt.Errorf("event from non-authoritative pds") 514 + } 515 + } 516 + 517 + if u.GetTombstoned() { 518 + span.SetAttributes(attribute.Bool("tombstoned", true)) 519 + // we've checked the authority of the users PDS, so reinstate the account 520 + if err := s.db.Model(&User{}).Where("id = ?", u.ID).UpdateColumn("tombstoned", false).Error; err != nil { 521 + repoCommitsResultCounter.WithLabelValues(host.Host, "tomb").Inc() 522 + return fmt.Errorf("failed to un-tombstone a user: %w", err) 523 + } 524 + u.SetTombstoned(false) 525 + 526 + ai, err := s.Index.LookupUser(ctx, u.ID) 527 + if err != nil { 528 + repoCommitsResultCounter.WithLabelValues(host.Host, "nou2").Inc() 529 + return fmt.Errorf("failed to look up user (tombstone recover): %w", err) 530 + } 531 + 532 + // Now a simple re-crawl should suffice to bring the user back online 533 + repoCommitsResultCounter.WithLabelValues(host.Host, "catchupt").Inc() 534 + return s.Index.Crawler.AddToCatchupQueue(ctx, host, ai, evt) 535 + } 536 + 537 + // skip the fast path for rebases or if the user is already in the slow path 538 + if s.Index.Crawler.RepoInSlowPath(ctx, u.ID) { 539 + ai, err := s.Index.LookupUser(ctx, u.ID) 540 + if err != nil { 541 + repoCommitsResultCounter.WithLabelValues(host.Host, "nou3").Inc() 542 + return fmt.Errorf("failed to look up user (slow path): %w", err) 543 + } 544 + 545 + // TODO: we currently do not handle events that get queued up 546 + // behind an already 'in progress' slow path event. 547 + // this is strictly less efficient than it could be, and while it 548 + // does 'work' (due to falling back to resyncing the repo), its 549 + // technically incorrect. Now that we have the parallel event 550 + // processor coming off of the pds stream, we should investigate 551 + // whether or not we even need this 'slow path' logic, as it makes 552 + // accounting for which events have been processed much harder 553 + repoCommitsResultCounter.WithLabelValues(host.Host, "catchup").Inc() 554 + return s.Index.Crawler.AddToCatchupQueue(ctx, host, ai, evt) 555 + } 556 + 557 + if err := s.repoman.HandleExternalUserEvent(ctx, host.ID, u.ID, u.Did, evt.Since, evt.Rev, evt.Blocks, evt.Ops); err != nil { 558 + 559 + if errors.Is(err, carstore.ErrRepoBaseMismatch) || ipld.IsNotFound(err) { 560 + ai, lerr := s.Index.LookupUser(ctx, u.ID) 561 + if lerr != nil { 562 + log.Warn("failed handling event, no user", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "commit", evt.Commit.String()) 563 + repoCommitsResultCounter.WithLabelValues(host.Host, "nou4").Inc() 564 + return fmt.Errorf("failed to look up user %s (%d) (err case: %s): %w", u.Did, u.ID, err, lerr) 565 + } 566 + 567 + span.SetAttributes(attribute.Bool("catchup_queue", true)) 568 + 569 + log.Info("failed handling event, catchup", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "commit", evt.Commit.String()) 570 + repoCommitsResultCounter.WithLabelValues(host.Host, "catchup2").Inc() 571 + return s.Index.Crawler.AddToCatchupQueue(ctx, host, ai, evt) 572 + } 573 + 574 + log.Warn("failed handling event", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "commit", evt.Commit.String()) 575 + repoCommitsResultCounter.WithLabelValues(host.Host, "err").Inc() 576 + return fmt.Errorf("handle user event failed: %w", err) 577 + } 578 + 579 + repoCommitsResultCounter.WithLabelValues(host.Host, "ok").Inc() 580 + return nil 581 + case env.RepoIdentity != nil: 582 + s.log.Info("bgs got identity event", "did", env.RepoIdentity.Did) 583 + // Flush any cached DID documents for this user 584 + s.didr.FlushCacheFor(env.RepoIdentity.Did) 585 + 586 + // Refetch the DID doc and update our cached keys and handle etc. 587 + _, err := s.handleUserUpdate(ctx, env.RepoIdentity.Did) 588 + if err != nil { 589 + return err 590 + } 591 + 592 + return nil 593 + case env.RepoAccount != nil: 594 + span.SetAttributes( 595 + attribute.String("did", env.RepoAccount.Did), 596 + attribute.Int64("seq", env.RepoAccount.Seq), 597 + attribute.Bool("active", env.RepoAccount.Active), 598 + ) 599 + 600 + if env.RepoAccount.Status != nil { 601 + span.SetAttributes(attribute.String("repo_status", *env.RepoAccount.Status)) 602 + } 603 + 604 + s.log.Info("bgs got account event", "did", env.RepoAccount.Did) 605 + // Flush any cached DID documents for this user 606 + s.didr.FlushCacheFor(env.RepoAccount.Did) 607 + 608 + // Refetch the DID doc to make sure the PDS is still authoritative 609 + ai, err := s.handleUserUpdate(ctx, env.RepoAccount.Did) 610 + if err != nil { 611 + span.RecordError(err) 612 + return err 613 + } 614 + 615 + // Check if the PDS is still authoritative 616 + // if not we don't want to be propagating this account event 617 + if ai.PDS != host.ID { 618 + s.log.Error("account event from non-authoritative pds", 619 + "seq", env.RepoAccount.Seq, 620 + "did", env.RepoAccount.Did, 621 + "event_from", host.Host, 622 + "did_doc_declared_pds", ai.PDS, 623 + "account_evt", env.RepoAccount, 624 + ) 625 + return fmt.Errorf("event from non-authoritative pds") 626 + } 627 + 628 + // Process the account status change 629 + repoStatus := events.AccountStatusActive 630 + if !env.RepoAccount.Active && env.RepoAccount.Status != nil { 631 + repoStatus = *env.RepoAccount.Status 632 + } 633 + 634 + err = s.UpdateAccountStatus(ctx, env.RepoAccount.Did, repoStatus) 635 + if err != nil { 636 + span.RecordError(err) 637 + return fmt.Errorf("failed to update account status: %w", err) 638 + } 639 + 640 + return nil 641 + default: 642 + return fmt.Errorf("invalid fed event") 643 + } 644 + } 645 + 646 + func (s *Archiver) UpdateAccountStatus(ctx context.Context, did string, status string) error { 647 + ctx, span := tracer.Start(ctx, "UpdateAccountStatus") 648 + defer span.End() 649 + 650 + span.SetAttributes( 651 + attribute.String("did", did), 652 + attribute.String("status", status), 653 + ) 654 + 655 + u, err := s.lookupUserByDid(ctx, did) 656 + if err != nil { 657 + return err 658 + } 659 + 660 + switch status { 661 + case events.AccountStatusActive: 662 + // Unset the PDS-specific status flags 663 + if err := s.db.Model(User{}).Where("id = ?", u.ID).Update("upstream_status", events.AccountStatusActive).Error; err != nil { 664 + return fmt.Errorf("failed to set user active status: %w", err) 665 + } 666 + u.SetUpstreamStatus(events.AccountStatusActive) 667 + case events.AccountStatusDeactivated: 668 + if err := s.db.Model(User{}).Where("id = ?", u.ID).Update("upstream_status", events.AccountStatusDeactivated).Error; err != nil { 669 + return fmt.Errorf("failed to set user deactivation status: %w", err) 670 + } 671 + u.SetUpstreamStatus(events.AccountStatusDeactivated) 672 + case events.AccountStatusSuspended: 673 + if err := s.db.Model(User{}).Where("id = ?", u.ID).Update("upstream_status", events.AccountStatusSuspended).Error; err != nil { 674 + return fmt.Errorf("failed to set user suspension status: %w", err) 675 + } 676 + u.SetUpstreamStatus(events.AccountStatusSuspended) 677 + case events.AccountStatusTakendown: 678 + if err := s.db.Model(User{}).Where("id = ?", u.ID).Update("upstream_status", events.AccountStatusTakendown).Error; err != nil { 679 + return fmt.Errorf("failed to set user taken down status: %w", err) 680 + } 681 + u.SetUpstreamStatus(events.AccountStatusTakendown) 682 + 683 + if err := s.db.Model(&models.ActorInfo{}).Where("uid = ?", u.ID).UpdateColumns(map[string]any{ 684 + "handle": nil, 685 + }).Error; err != nil { 686 + return err 687 + } 688 + case events.AccountStatusDeleted: 689 + if err := s.db.Model(&User{}).Where("id = ?", u.ID).UpdateColumns(map[string]any{ 690 + "tombstoned": true, 691 + "handle": nil, 692 + "upstream_status": events.AccountStatusDeleted, 693 + }).Error; err != nil { 694 + return err 695 + } 696 + u.SetUpstreamStatus(events.AccountStatusDeleted) 697 + 698 + if err := s.db.Model(&models.ActorInfo{}).Where("uid = ?", u.ID).UpdateColumns(map[string]any{ 699 + "handle": nil, 700 + }).Error; err != nil { 701 + return err 702 + } 703 + 704 + // delete data from carstore 705 + if err := s.repoman.TakeDownRepo(ctx, u.ID); err != nil { 706 + // don't let a failure here prevent us from propagating this event 707 + s.log.Error("failed to delete user data from carstore", "err", err) 708 + } 709 + } 710 + 711 + return nil 712 + } 713 + 714 + func (s *Archiver) lookupUserByDid(ctx context.Context, did string) (*User, error) { 715 + ctx, span := tracer.Start(ctx, "lookupUserByDid") 716 + defer span.End() 717 + 718 + cu, ok := s.userCache.Get(did) 719 + if ok { 720 + return cu, nil 721 + } 722 + 723 + var u User 724 + if err := s.db.Find(&u, "did = ?", did).Error; err != nil { 725 + return nil, err 726 + } 727 + 728 + if u.ID == 0 { 729 + return nil, gorm.ErrRecordNotFound 730 + } 731 + 732 + s.userCache.Add(did, &u) 733 + 734 + return &u, nil 735 + } 736 + 737 + func (s *Archiver) lookupUserByUID(ctx context.Context, uid models.Uid) (*User, error) { 738 + ctx, span := tracer.Start(ctx, "lookupUserByUID") 739 + defer span.End() 740 + 741 + var u User 742 + if err := s.db.Find(&u, "id = ?", uid).Error; err != nil { 743 + return nil, err 744 + } 745 + 746 + if u.ID == 0 { 747 + return nil, gorm.ErrRecordNotFound 748 + } 749 + 750 + return &u, nil 751 + } 752 + 753 + type AuthToken struct { 754 + gorm.Model 755 + Token string `gorm:"index"` 756 + } 757 + 758 + func (s *Archiver) lookupAdminToken(tok string) (bool, error) { 759 + var at AuthToken 760 + if err := s.db.Find(&at, "token = ?", tok).Error; err != nil { 761 + return false, err 762 + } 763 + 764 + if at.ID == 0 { 765 + return false, nil 766 + } 767 + 768 + return true, nil 769 + } 770 + 771 + func (s *Archiver) CreateAdminToken(tok string) error { 772 + exists, err := s.lookupAdminToken(tok) 773 + if err != nil { 774 + return err 775 + } 776 + 777 + if exists { 778 + return nil 779 + } 780 + 781 + return s.db.Create(&AuthToken{ 782 + Token: tok, 783 + }).Error 784 + } 785 + 786 + func (s *Archiver) StartMetrics(listen string) error { 787 + http.Handle("/metrics", promhttp.Handler()) 788 + return http.ListenAndServe(listen, nil) 789 + } 790 + 791 + const serverListenerBootTimeout = 5 * time.Second 792 + 793 + func (s *Archiver) Start(addr string) error { 794 + var lc net.ListenConfig 795 + ctx, cancel := context.WithTimeout(context.Background(), serverListenerBootTimeout) 796 + defer cancel() 797 + 798 + li, err := lc.Listen(ctx, "tcp", addr) 799 + if err != nil { 800 + return err 801 + } 802 + return s.StartWithListener(li) 803 + } 804 + 805 + func (s *Archiver) StartWithListener(listen net.Listener) error { 806 + e := echo.New() 807 + e.HideBanner = true 808 + 809 + e.Use(middleware.CORSWithConfig(middleware.CORSConfig{ 810 + AllowOrigins: []string{"*"}, 811 + AllowHeaders: []string{echo.HeaderOrigin, echo.HeaderContentType, echo.HeaderAccept, echo.HeaderAuthorization}, 812 + })) 813 + 814 + if !s.ssl { 815 + e.Use(middleware.LoggerWithConfig(middleware.LoggerConfig{ 816 + Format: "method=${method}, uri=${uri}, status=${status} latency=${latency_human}\n", 817 + })) 818 + } else { 819 + e.Use(middleware.LoggerWithConfig(middleware.DefaultLoggerConfig)) 820 + } 821 + 822 + // React uses a virtual router, so we need to serve the index.html for all 823 + // routes that aren't otherwise handled or in the /assets directory. 824 + e.File("/dash", "public/index.html") 825 + e.File("/dash/*", "public/index.html") 826 + e.Static("/assets", "public/assets") 827 + 828 + e.Use(svcutil.MetricsMiddleware) 829 + 830 + e.HTTPErrorHandler = func(err error, ctx echo.Context) { 831 + switch err := err.(type) { 832 + case *echo.HTTPError: 833 + if err2 := ctx.JSON(err.Code, map[string]any{ 834 + "error": err.Message, 835 + }); err2 != nil { 836 + s.log.Error("Failed to write http error", "err", err2) 837 + } 838 + default: 839 + sendHeader := true 840 + if ctx.Path() == "/xrpc/com.atproto.sync.subscribeRepos" { 841 + sendHeader = false 842 + } 843 + 844 + s.log.Warn("HANDLER ERROR: (%s) %s", ctx.Path(), err) 845 + 846 + if strings.HasPrefix(ctx.Path(), "/admin/") { 847 + ctx.JSON(500, map[string]any{ 848 + "error": err.Error(), 849 + }) 850 + return 851 + } 852 + 853 + if sendHeader { 854 + ctx.Response().WriteHeader(500) 855 + } 856 + } 857 + } 858 + 859 + // TODO: this API is temporary until we formalize what we want here 860 + 861 + e.GET("/xrpc/com.atproto.sync.getRepo", s.HandleComAtprotoSyncGetRepo) 862 + //e.GET("/xrpc/com.atproto.sync.listRepos", s.HandleComAtprotoSyncListRepos) 863 + //e.GET("/xrpc/com.atproto.sync.getLatestCommit", s.HandleComAtprotoSyncGetLatestCommit) 864 + e.GET("/xrpc/_health", s.HandleHealthCheck) 865 + e.GET("/_health", s.HandleHealthCheck) 866 + e.GET("/", s.HandleHomeMessage) 867 + 868 + /* 869 + admin := e.Group("/admin", s.checkAdminAuth) 870 + 871 + // Slurper-related Admin API 872 + admin.GET("/subs/getUpstreamConns", s.handleAdminGetUpstreamConns) 873 + admin.GET("/subs/getEnabled", s.handleAdminGetSubsEnabled) 874 + admin.GET("/subs/perDayLimit", s.handleAdminGetNewPDSPerDayRateLimit) 875 + admin.POST("/subs/setEnabled", s.handleAdminSetSubsEnabled) 876 + admin.POST("/subs/killUpstream", s.handleAdminKillUpstreamConn) 877 + admin.POST("/subs/setPerDayLimit", s.handleAdminSetNewPDSPerDayRateLimit) 878 + 879 + // Domain-related Admin API 880 + admin.GET("/subs/listDomainBans", s.handleAdminListDomainBans) 881 + admin.POST("/subs/banDomain", s.handleAdminBanDomain) 882 + admin.POST("/subs/unbanDomain", s.handleAdminUnbanDomain) 883 + 884 + // Repo-related Admin API 885 + admin.POST("/repo/takeDown", s.handleAdminTakeDownRepo) 886 + admin.POST("/repo/reverseTakedown", s.handleAdminReverseTakedown) 887 + admin.GET("/repo/takedowns", s.handleAdminListRepoTakeDowns) 888 + admin.POST("/repo/compact", s.handleAdminCompactRepo) 889 + admin.POST("/repo/compactAll", s.handleAdminCompactAllRepos) 890 + admin.POST("/repo/reset", s.handleAdminResetRepo) 891 + admin.POST("/repo/verify", s.handleAdminVerifyRepo) 892 + 893 + // PDS-related Admin API 894 + admin.POST("/pds/requestCrawl", s.handleAdminRequestCrawl) 895 + admin.GET("/pds/list", s.handleListPDSs) 896 + admin.POST("/pds/resync", s.handleAdminPostResyncPDS) 897 + admin.GET("/pds/resync", s.handleAdminGetResyncPDS) 898 + admin.POST("/pds/changeLimits", s.handleAdminChangePDSRateLimits) 899 + admin.POST("/pds/block", s.handleBlockPDS) 900 + admin.POST("/pds/unblock", s.handleUnblockPDS) 901 + admin.POST("/pds/addTrustedDomain", s.handleAdminAddTrustedDomain) 902 + 903 + // Consumer-related Admin API 904 + admin.GET("/consumers/list", s.handleAdminListConsumers) 905 + */ 906 + 907 + // In order to support booting on random ports in tests, we need to tell the 908 + // Echo instance it's already got a port, and then use its StartServer 909 + // method to re-use that listener. 910 + e.Listener = listen 911 + srv := &http.Server{} 912 + return e.StartServer(srv) 913 + } 914 + 915 + func (s *Archiver) Shutdown() []error { 916 + errs := s.slurper.Shutdown() 917 + 918 + s.compactor.Shutdown() 919 + 920 + return errs 921 + }
+314
archiver/crawler.go
··· 1 + package archiver 2 + 3 + import ( 4 + "context" 5 + "fmt" 6 + "log/slog" 7 + "sync" 8 + "time" 9 + 10 + comatproto "github.com/bluesky-social/indigo/api/atproto" 11 + "github.com/bluesky-social/indigo/models" 12 + 13 + "go.opentelemetry.io/otel" 14 + ) 15 + 16 + type CrawlDispatcher struct { 17 + // from Crawl() 18 + ingest chan *User 19 + 20 + // from AddToCatchupQueue() 21 + catchup chan *crawlWork 22 + 23 + // from main loop to fetchWorker() 24 + repoSync chan *crawlWork 25 + 26 + complete chan models.Uid 27 + 28 + maplk sync.Mutex 29 + todo map[models.Uid]*crawlWork 30 + inProgress map[models.Uid]*crawlWork 31 + 32 + repoFetcher CrawlRepoFetcher 33 + 34 + concurrency int 35 + 36 + log *slog.Logger 37 + 38 + done chan struct{} 39 + } 40 + 41 + // this is what we need of RepoFetcher 42 + type CrawlRepoFetcher interface { 43 + FetchAndIndexRepo(ctx context.Context, job *crawlWork) error 44 + } 45 + 46 + func NewCrawlDispatcher(repoFetcher CrawlRepoFetcher, concurrency int, log *slog.Logger) (*CrawlDispatcher, error) { 47 + if concurrency < 1 { 48 + return nil, fmt.Errorf("must specify a non-zero positive integer for crawl dispatcher concurrency") 49 + } 50 + 51 + out := &CrawlDispatcher{ 52 + ingest: make(chan *User), 53 + repoSync: make(chan *crawlWork), 54 + complete: make(chan models.Uid), 55 + catchup: make(chan *crawlWork), 56 + repoFetcher: repoFetcher, 57 + concurrency: concurrency, 58 + todo: make(map[models.Uid]*crawlWork), 59 + inProgress: make(map[models.Uid]*crawlWork), 60 + log: log, 61 + done: make(chan struct{}), 62 + } 63 + go out.CatchupRepoGaugePoller() 64 + 65 + return out, nil 66 + } 67 + 68 + func (c *CrawlDispatcher) Run() { 69 + go c.mainLoop() 70 + 71 + for i := 0; i < c.concurrency; i++ { 72 + go c.fetchWorker() 73 + } 74 + } 75 + 76 + func (c *CrawlDispatcher) Shutdown() { 77 + close(c.done) 78 + } 79 + 80 + type catchupJob struct { 81 + evt *comatproto.SyncSubscribeRepos_Commit 82 + host *models.PDS 83 + user *User 84 + } 85 + 86 + type crawlWork struct { 87 + act *User 88 + initScrape bool 89 + 90 + // for events that come in while this actor's crawl is enqueued 91 + // catchup items are processed during the crawl 92 + catchup []*catchupJob 93 + 94 + // for events that come in while this actor is being processed 95 + // next items are processed after the crawl 96 + next []*catchupJob 97 + } 98 + 99 + func (c *CrawlDispatcher) mainLoop() { 100 + var nextDispatchedJob *crawlWork 101 + var jobsAwaitingDispatch []*crawlWork 102 + 103 + // dispatchQueue represents the repoSync worker channel to which we dispatch crawl work 104 + var dispatchQueue chan *crawlWork 105 + 106 + for { 107 + select { 108 + case actorToCrawl := <-c.ingest: 109 + // TODO: max buffer size 110 + crawlJob := c.enqueueJobForActor(actorToCrawl) 111 + if crawlJob == nil { 112 + break 113 + } 114 + 115 + if nextDispatchedJob == nil { 116 + nextDispatchedJob = crawlJob 117 + dispatchQueue = c.repoSync 118 + } else { 119 + jobsAwaitingDispatch = append(jobsAwaitingDispatch, crawlJob) 120 + } 121 + case dispatchQueue <- nextDispatchedJob: 122 + c.dequeueJob(nextDispatchedJob) 123 + 124 + if len(jobsAwaitingDispatch) > 0 { 125 + nextDispatchedJob = jobsAwaitingDispatch[0] 126 + jobsAwaitingDispatch = jobsAwaitingDispatch[1:] 127 + } else { 128 + nextDispatchedJob = nil 129 + dispatchQueue = nil 130 + } 131 + case catchupJob := <-c.catchup: 132 + // CatchupJobs are for processing events that come in while a crawl is in progress 133 + // They are lower priority than new crawls so we only add them to the queue if there isn't already a job in progress 134 + if nextDispatchedJob == nil { 135 + nextDispatchedJob = catchupJob 136 + dispatchQueue = c.repoSync 137 + } else { 138 + jobsAwaitingDispatch = append(jobsAwaitingDispatch, catchupJob) 139 + } 140 + case uid := <-c.complete: 141 + c.maplk.Lock() 142 + 143 + job, ok := c.inProgress[uid] 144 + if !ok { 145 + panic("should not be possible to not have a job in progress we receive a completion signal for") 146 + } 147 + delete(c.inProgress, uid) 148 + 149 + // If there are any subsequent jobs for this UID, add it back to the todo list or buffer. 150 + // We're basically pumping the `next` queue into the `catchup` queue and will do this over and over until the `next` queue is empty. 151 + if len(job.next) > 0 { 152 + c.todo[uid] = job 153 + job.initScrape = false 154 + job.catchup = job.next 155 + job.next = nil 156 + if nextDispatchedJob == nil { 157 + nextDispatchedJob = job 158 + dispatchQueue = c.repoSync 159 + } else { 160 + jobsAwaitingDispatch = append(jobsAwaitingDispatch, job) 161 + } 162 + } 163 + c.maplk.Unlock() 164 + } 165 + } 166 + } 167 + 168 + // enqueueJobForActor adds a new crawl job to the todo list if there isn't already a job in progress for this actor 169 + func (c *CrawlDispatcher) enqueueJobForActor(ai *User) *crawlWork { 170 + c.maplk.Lock() 171 + defer c.maplk.Unlock() 172 + _, ok := c.inProgress[ai.ID] 173 + if ok { 174 + return nil 175 + } 176 + 177 + _, has := c.todo[ai.ID] 178 + if has { 179 + return nil 180 + } 181 + 182 + crawlJob := &crawlWork{ 183 + act: ai, 184 + initScrape: true, 185 + } 186 + c.todo[ai.ID] = crawlJob 187 + return crawlJob 188 + } 189 + 190 + // dequeueJob removes a job from the todo list and adds it to the inProgress list 191 + func (c *CrawlDispatcher) dequeueJob(job *crawlWork) { 192 + c.maplk.Lock() 193 + defer c.maplk.Unlock() 194 + delete(c.todo, job.act.ID) 195 + c.inProgress[job.act.ID] = job 196 + } 197 + 198 + func (c *CrawlDispatcher) addToCatchupQueue(catchup *catchupJob) *crawlWork { 199 + c.maplk.Lock() 200 + defer c.maplk.Unlock() 201 + 202 + // If the actor crawl is enqueued, we can append to the catchup queue which gets emptied during the crawl 203 + job, ok := c.todo[catchup.user.ID] 204 + if ok { 205 + catchupEventsEnqueued.WithLabelValues("todo").Inc() 206 + job.catchup = append(job.catchup, catchup) 207 + return nil 208 + } 209 + 210 + // If the actor crawl is in progress, we can append to the nextr queue which gets emptied after the crawl 211 + job, ok = c.inProgress[catchup.user.ID] 212 + if ok { 213 + catchupEventsEnqueued.WithLabelValues("prog").Inc() 214 + job.next = append(job.next, catchup) 215 + return nil 216 + } 217 + 218 + catchupEventsEnqueued.WithLabelValues("new").Inc() 219 + // Otherwise, we need to create a new crawl job for this actor and enqueue it 220 + cw := &crawlWork{ 221 + act: catchup.user, 222 + catchup: []*catchupJob{catchup}, 223 + } 224 + c.todo[catchup.user.ID] = cw 225 + return cw 226 + } 227 + 228 + func (c *CrawlDispatcher) fetchWorker() { 229 + for { 230 + select { 231 + case job := <-c.repoSync: 232 + if err := c.repoFetcher.FetchAndIndexRepo(context.TODO(), job); err != nil { 233 + c.log.Error("failed to perform repo crawl", "did", job.act.Did, "err", err) 234 + } 235 + 236 + // TODO: do we still just do this if it errors? 237 + c.complete <- job.act.ID 238 + } 239 + } 240 + } 241 + 242 + func (c *CrawlDispatcher) Crawl(ctx context.Context, ai *User) error { 243 + if ai.PDS == 0 { 244 + panic("must have pds for user in queue") 245 + } 246 + 247 + userCrawlsEnqueued.Inc() 248 + 249 + ctx, span := otel.Tracer("crawler").Start(ctx, "addToCrawler") 250 + defer span.End() 251 + 252 + select { 253 + case c.ingest <- ai: 254 + return nil 255 + case <-ctx.Done(): 256 + return ctx.Err() 257 + } 258 + } 259 + 260 + func (c *CrawlDispatcher) AddToCatchupQueue(ctx context.Context, host *models.PDS, u *User, evt *comatproto.SyncSubscribeRepos_Commit) error { 261 + if u.PDS == 0 { 262 + panic("must have pds for user in queue") 263 + } 264 + 265 + catchup := &catchupJob{ 266 + evt: evt, 267 + host: host, 268 + user: u, 269 + } 270 + 271 + cw := c.addToCatchupQueue(catchup) 272 + if cw == nil { 273 + return nil 274 + } 275 + 276 + select { 277 + case c.catchup <- cw: 278 + return nil 279 + case <-ctx.Done(): 280 + return ctx.Err() 281 + } 282 + } 283 + 284 + func (c *CrawlDispatcher) RepoInSlowPath(ctx context.Context, uid models.Uid) bool { 285 + c.maplk.Lock() 286 + defer c.maplk.Unlock() 287 + if _, ok := c.todo[uid]; ok { 288 + return true 289 + } 290 + 291 + if _, ok := c.inProgress[uid]; ok { 292 + return true 293 + } 294 + 295 + return false 296 + } 297 + 298 + func (c *CrawlDispatcher) countReposInSlowPath() int { 299 + c.maplk.Lock() 300 + defer c.maplk.Unlock() 301 + return len(c.inProgress) + len(c.todo) 302 + } 303 + 304 + func (c *CrawlDispatcher) CatchupRepoGaugePoller() { 305 + ticker := time.NewTicker(30 * time.Second) 306 + defer ticker.Stop() 307 + for { 308 + select { 309 + case <-c.done: 310 + case <-ticker.C: 311 + catchupReposGauge.Set(float64(c.countReposInSlowPath())) 312 + } 313 + } 314 + }
+88
archiver/handlers.go
··· 1 + package archiver 2 + 3 + import ( 4 + "errors" 5 + "fmt" 6 + "net/http" 7 + 8 + "github.com/bluesky-social/indigo/atproto/syntax" 9 + "github.com/bluesky-social/indigo/bgs" 10 + "github.com/bluesky-social/indigo/events" 11 + "github.com/labstack/echo/v4" 12 + "github.com/labstack/gommon/log" 13 + "go.opentelemetry.io/otel" 14 + "gorm.io/gorm" 15 + ) 16 + 17 + func (s *Archiver) HandleComAtprotoSyncGetRepo(c echo.Context) error { 18 + ctx, span := otel.Tracer("server").Start(c.Request().Context(), "HandleComAtprotoSyncGetRepo") 19 + defer span.End() 20 + did := c.QueryParam("did") 21 + since := c.QueryParam("since") 22 + 23 + _, err := syntax.ParseDID(did) 24 + if err != nil { 25 + return c.JSON(http.StatusBadRequest, bgs.XRPCError{Message: fmt.Sprintf("invalid did: %s", did)}) 26 + } 27 + 28 + c.Response().Header().Set(echo.HeaderContentType, "application/vnd.ipld.car") 29 + 30 + u, err := s.lookupUserByDid(ctx, did) 31 + if err != nil { 32 + if errors.Is(err, gorm.ErrRecordNotFound) { 33 + return echo.NewHTTPError(http.StatusNotFound, "user not found") 34 + } 35 + log.Error("failed to lookup user", "err", err, "did", did) 36 + return echo.NewHTTPError(http.StatusInternalServerError, "failed to lookup user") 37 + } 38 + 39 + if u.GetTombstoned() { 40 + return fmt.Errorf("account was deleted") 41 + } 42 + 43 + if u.GetTakenDown() { 44 + return fmt.Errorf("account was taken down by the Relay") 45 + } 46 + 47 + ustatus := u.GetUpstreamStatus() 48 + if ustatus == events.AccountStatusTakendown { 49 + return fmt.Errorf("account was taken down by its PDS") 50 + } 51 + 52 + if ustatus == events.AccountStatusDeactivated { 53 + return fmt.Errorf("account is temporarily deactivated") 54 + } 55 + 56 + if ustatus == events.AccountStatusSuspended { 57 + return fmt.Errorf("account is suspended by its PDS") 58 + } 59 + 60 + if err := s.repoman.ReadRepo(ctx, u.ID, since, c.Response()); err != nil { 61 + log.Error("failed to stream repo", "err", err, "did", did) 62 + return echo.NewHTTPError(http.StatusInternalServerError, "failed to stream repo") 63 + } 64 + 65 + return nil 66 + } 67 + 68 + type HealthStatus struct { 69 + Status string `json:"status"` 70 + Message string `json:"msg,omitempty"` 71 + } 72 + 73 + func (s *Archiver) HandleHealthCheck(c echo.Context) error { 74 + if err := s.db.Exec("SELECT 1").Error; err != nil { 75 + s.log.Error("healthcheck can't connect to database", "err", err) 76 + return c.JSON(500, HealthStatus{Status: "error", Message: "can't connect to database"}) 77 + } else { 78 + return c.JSON(200, HealthStatus{Status: "ok"}) 79 + } 80 + } 81 + 82 + var homeMessage string = ` 83 + [ insert fancy archiver art here ] 84 + ` 85 + 86 + func (s *Archiver) HandleHomeMessage(c echo.Context) error { 87 + return c.String(http.StatusOK, homeMessage) 88 + }
+217
archiver/loader.go
··· 1 + package archiver 2 + 3 + import ( 4 + "context" 5 + "errors" 6 + "fmt" 7 + "log/slog" 8 + "time" 9 + 10 + comatproto "github.com/bluesky-social/indigo/api/atproto" 11 + "github.com/bluesky-social/indigo/did" 12 + "github.com/bluesky-social/indigo/events" 13 + lexutil "github.com/bluesky-social/indigo/lex/util" 14 + "github.com/bluesky-social/indigo/models" 15 + "github.com/bluesky-social/indigo/repomgr" 16 + "github.com/bluesky-social/indigo/util" 17 + "github.com/bluesky-social/indigo/xrpc" 18 + 19 + "go.opentelemetry.io/otel" 20 + "gorm.io/gorm" 21 + ) 22 + 23 + const MaxEventSliceLength = 1000000 24 + const MaxOpsSliceLength = 200 25 + 26 + type Indexer struct { 27 + db *gorm.DB 28 + 29 + addEvent AddEventFunc 30 + didr did.Resolver 31 + 32 + Crawler *CrawlDispatcher 33 + 34 + SendRemoteFollow func(context.Context, string, uint) error 35 + CreateExternalUser func(context.Context, string) (*User, error) 36 + ApplyPDSClientSettings func(*xrpc.Client) 37 + 38 + log *slog.Logger 39 + } 40 + 41 + type AddEventFunc func(ctx context.Context, ev *events.XRPCStreamEvent) error 42 + 43 + func NewIndexer(db *gorm.DB, addEvent AddEventFunc, didr did.Resolver, fetcher *RepoFetcher, crawl bool) (*Indexer, error) { 44 + ix := &Indexer{ 45 + db: db, 46 + addEvent: addEvent, 47 + didr: didr, 48 + SendRemoteFollow: func(context.Context, string, uint) error { 49 + return nil 50 + }, 51 + ApplyPDSClientSettings: func(*xrpc.Client) {}, 52 + log: slog.Default().With("system", "indexer"), 53 + } 54 + 55 + if crawl { 56 + c, err := NewCrawlDispatcher(fetcher, fetcher.MaxConcurrency, ix.log) 57 + if err != nil { 58 + return nil, err 59 + } 60 + 61 + ix.Crawler = c 62 + ix.Crawler.Run() 63 + } 64 + 65 + return ix, nil 66 + } 67 + 68 + func (ix *Indexer) Shutdown() { 69 + if ix.Crawler != nil { 70 + ix.Crawler.Shutdown() 71 + } 72 + } 73 + 74 + func (ix *Indexer) HandleRepoEvent(ctx context.Context, evt *repomgr.RepoEvent) error { 75 + ctx, span := otel.Tracer("indexer").Start(ctx, "HandleRepoEvent") 76 + defer span.End() 77 + 78 + ix.log.Debug("Handling Repo Event!", "uid", evt.User) 79 + 80 + outops := make([]*comatproto.SyncSubscribeRepos_RepoOp, 0, len(evt.Ops)) 81 + for _, op := range evt.Ops { 82 + link := (*lexutil.LexLink)(op.RecCid) 83 + outops = append(outops, &comatproto.SyncSubscribeRepos_RepoOp{ 84 + Path: op.Collection + "/" + op.Rkey, 85 + Action: string(op.Kind), 86 + Cid: link, 87 + }) 88 + } 89 + 90 + did, err := ix.DidForUser(ctx, evt.User) 91 + if err != nil { 92 + return err 93 + } 94 + 95 + toobig := false 96 + slice := evt.RepoSlice 97 + if len(slice) > MaxEventSliceLength || len(outops) > MaxOpsSliceLength { 98 + slice = []byte{} 99 + outops = nil 100 + toobig = true 101 + } 102 + 103 + ix.log.Debug("Sending event", "did", did) 104 + if err := ix.addEvent(ctx, &events.XRPCStreamEvent{ 105 + RepoCommit: &comatproto.SyncSubscribeRepos_Commit{ 106 + Repo: did, 107 + Blocks: slice, 108 + Rev: evt.Rev, 109 + Since: evt.Since, 110 + Commit: lexutil.LexLink(evt.NewRoot), 111 + Time: time.Now().Format(util.ISO8601), 112 + Ops: outops, 113 + TooBig: toobig, 114 + }, 115 + PrivUid: evt.User, 116 + }); err != nil { 117 + return fmt.Errorf("failed to push event: %s", err) 118 + } 119 + 120 + return nil 121 + } 122 + 123 + func (ix *Indexer) GetUserOrMissing(ctx context.Context, did string) (*User, error) { 124 + ctx, span := otel.Tracer("indexer").Start(ctx, "getUserOrMissing") 125 + defer span.End() 126 + 127 + ai, err := ix.LookupUserByDid(ctx, did) 128 + if err == nil { 129 + return ai, nil 130 + } 131 + 132 + if !isNotFound(err) { 133 + return nil, err 134 + } 135 + 136 + // unknown user... create it and send it off to the crawler 137 + return ix.createMissingUserRecord(ctx, did) 138 + } 139 + 140 + func (ix *Indexer) createMissingUserRecord(ctx context.Context, did string) (*User, error) { 141 + ctx, span := otel.Tracer("indexer").Start(ctx, "createMissingUserRecord") 142 + defer span.End() 143 + 144 + externalUserCreationAttempts.Inc() 145 + 146 + ai, err := ix.CreateExternalUser(ctx, did) 147 + if err != nil { 148 + return nil, err 149 + } 150 + 151 + if err := ix.addUserToCrawler(ctx, ai); err != nil { 152 + return nil, fmt.Errorf("failed to add unknown user to crawler: %w", err) 153 + } 154 + 155 + return ai, nil 156 + } 157 + 158 + func (ix *Indexer) addUserToCrawler(ctx context.Context, ai *User) error { 159 + ix.log.Debug("Sending user to crawler: ", "did", ai.Did) 160 + if ix.Crawler == nil { 161 + return nil 162 + } 163 + 164 + return ix.Crawler.Crawl(ctx, ai) 165 + } 166 + 167 + func (ix *Indexer) DidForUser(ctx context.Context, uid models.Uid) (string, error) { 168 + var ai User 169 + if err := ix.db.First(&ai, "uid = ?", uid).Error; err != nil { 170 + return "", err 171 + } 172 + 173 + return ai.Did, nil 174 + } 175 + 176 + func (ix *Indexer) LookupUser(ctx context.Context, id models.Uid) (*User, error) { 177 + var ai User 178 + if err := ix.db.First(&ai, "uid = ?", id).Error; err != nil { 179 + return nil, err 180 + } 181 + 182 + return &ai, nil 183 + } 184 + 185 + func (ix *Indexer) LookupUserByDid(ctx context.Context, did string) (*User, error) { 186 + var ai User 187 + if err := ix.db.Find(&ai, "did = ?", did).Error; err != nil { 188 + return nil, err 189 + } 190 + 191 + if ai.ID == 0 { 192 + return nil, gorm.ErrRecordNotFound 193 + } 194 + 195 + return &ai, nil 196 + } 197 + 198 + func (ix *Indexer) LookupUserByHandle(ctx context.Context, handle string) (*User, error) { 199 + var ai User 200 + if err := ix.db.Find(&ai, "handle = ?", handle).Error; err != nil { 201 + return nil, err 202 + } 203 + 204 + if ai.ID == 0 { 205 + return nil, gorm.ErrRecordNotFound 206 + } 207 + 208 + return &ai, nil 209 + } 210 + 211 + func isNotFound(err error) bool { 212 + if errors.Is(err, gorm.ErrRecordNotFound) { 213 + return true 214 + } 215 + 216 + return false 217 + }
+76
archiver/metrics.go
··· 1 + package archiver 2 + 3 + import ( 4 + "github.com/prometheus/client_golang/prometheus" 5 + "github.com/prometheus/client_golang/prometheus/promauto" 6 + ) 7 + 8 + var eventsReceivedCounter = promauto.NewCounterVec(prometheus.CounterOpts{ 9 + Name: "arc_events_received_counter", 10 + Help: "The total number of events received", 11 + }, []string{"pds"}) 12 + 13 + var eventsHandleDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ 14 + Name: "arc_events_handle_duration", 15 + Help: "A histogram of handleFedEvent latencies", 16 + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), 17 + }, []string{"pds"}) 18 + 19 + var repoCommitsReceivedCounter = promauto.NewCounterVec(prometheus.CounterOpts{ 20 + Name: "arc_repo_commits_received_counter", 21 + Help: "The total number of events received", 22 + }, []string{"pds"}) 23 + 24 + var repoCommitsResultCounter = promauto.NewCounterVec(prometheus.CounterOpts{ 25 + Name: "arc_repo_commits_result_counter", 26 + Help: "The results of commit events received", 27 + }, []string{"pds", "status"}) 28 + 29 + var externalUserCreationAttempts = promauto.NewCounter(prometheus.CounterOpts{ 30 + Name: "archiver_external_user_creation_attempts", 31 + Help: "The total number of external users created", 32 + }) 33 + 34 + var newUsersDiscovered = promauto.NewCounter(prometheus.CounterOpts{ 35 + Name: "archiver_new_users_discovered", 36 + Help: "The total number of new users discovered directly from the firehose (not from refs)", 37 + }) 38 + 39 + var userLookupDuration = promauto.NewHistogram(prometheus.HistogramOpts{ 40 + Name: "arc_user_lookup_duration", 41 + Help: "A histogram of user lookup latencies", 42 + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), 43 + }) 44 + 45 + var newUserDiscoveryDuration = promauto.NewHistogram(prometheus.HistogramOpts{ 46 + Name: "arc_new_user_discovery_duration", 47 + Help: "A histogram of new user discovery latencies", 48 + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), 49 + }) 50 + 51 + // From old indexer code 52 + 53 + var userCrawlsEnqueued = promauto.NewCounter(prometheus.CounterOpts{ 54 + Name: "arc_user_crawls_enqueued", 55 + Help: "Number of user crawls enqueued", 56 + }) 57 + 58 + var reposFetched = promauto.NewCounterVec(prometheus.CounterOpts{ 59 + Name: "arc_repos_fetched", 60 + Help: "Number of repos fetched", 61 + }, []string{"status"}) 62 + 63 + var catchupEventsEnqueued = promauto.NewCounterVec(prometheus.CounterOpts{ 64 + Name: "arc_catchup_events_enqueued", 65 + Help: "Number of catchup events enqueued", 66 + }, []string{"how"}) 67 + 68 + var catchupEventsFailed = promauto.NewCounterVec(prometheus.CounterOpts{ 69 + Name: "arc_catchup_events_failed", 70 + Help: "Number of catchup events processed", 71 + }, []string{"err"}) 72 + 73 + var catchupReposGauge = promauto.NewGauge(prometheus.GaugeOpts{ 74 + Name: "arc_catchup_repos", 75 + Help: "Number of repos waiting on catchup", 76 + })
+128
archiver/repofetch.go
··· 1 + package archiver 2 + 3 + import ( 4 + "bytes" 5 + "context" 6 + "fmt" 7 + "log/slog" 8 + "sync" 9 + 10 + "github.com/bluesky-social/indigo/api/atproto" 11 + "github.com/bluesky-social/indigo/models" 12 + "github.com/bluesky-social/indigo/repomgr" 13 + "github.com/bluesky-social/indigo/xrpc" 14 + "go.opentelemetry.io/otel" 15 + "go.opentelemetry.io/otel/attribute" 16 + "golang.org/x/time/rate" 17 + "gorm.io/gorm" 18 + ) 19 + 20 + func NewRepoFetcher(db *gorm.DB, rm *repomgr.RepoManager, maxConcurrency int) *RepoFetcher { 21 + return &RepoFetcher{ 22 + repoman: rm, 23 + db: db, 24 + Limiters: make(map[uint]*rate.Limiter), 25 + ApplyPDSClientSettings: func(*xrpc.Client) {}, 26 + MaxConcurrency: maxConcurrency, 27 + log: slog.Default().With("system", "indexer"), 28 + } 29 + } 30 + 31 + type RepoFetcher struct { 32 + repoman *repomgr.RepoManager 33 + db *gorm.DB 34 + 35 + Limiters map[uint]*rate.Limiter 36 + LimitMux sync.RWMutex 37 + 38 + MaxConcurrency int 39 + 40 + ApplyPDSClientSettings func(*xrpc.Client) 41 + 42 + log *slog.Logger 43 + } 44 + 45 + func (rf *RepoFetcher) GetLimiter(pdsID uint) *rate.Limiter { 46 + rf.LimitMux.RLock() 47 + defer rf.LimitMux.RUnlock() 48 + 49 + return rf.Limiters[pdsID] 50 + } 51 + 52 + func (rf *RepoFetcher) GetOrCreateLimiter(pdsID uint, pdsrate float64) *rate.Limiter { 53 + rf.LimitMux.Lock() 54 + defer rf.LimitMux.Unlock() 55 + 56 + lim, ok := rf.Limiters[pdsID] 57 + if !ok { 58 + lim = rate.NewLimiter(rate.Limit(pdsrate), 1) 59 + rf.Limiters[pdsID] = lim 60 + } 61 + 62 + return lim 63 + } 64 + 65 + func (rf *RepoFetcher) SetLimiter(pdsID uint, lim *rate.Limiter) { 66 + rf.LimitMux.Lock() 67 + defer rf.LimitMux.Unlock() 68 + 69 + rf.Limiters[pdsID] = lim 70 + } 71 + 72 + func (rf *RepoFetcher) fetchRepo(ctx context.Context, c *xrpc.Client, pds *models.PDS, did string, rev string) ([]byte, error) { 73 + ctx, span := otel.Tracer("indexer").Start(ctx, "fetchRepo") 74 + defer span.End() 75 + 76 + span.SetAttributes( 77 + attribute.String("pds", pds.Host), 78 + attribute.String("did", did), 79 + attribute.String("rev", rev), 80 + ) 81 + 82 + limiter := rf.GetOrCreateLimiter(pds.ID, pds.CrawlRateLimit) 83 + 84 + // Wait to prevent DOSing the PDS when connecting to a new stream with lots of active repos 85 + limiter.Wait(ctx) 86 + 87 + rf.log.Debug("SyncGetRepo", "did", did, "since", rev) 88 + // TODO: max size on these? A malicious PDS could just send us a petabyte sized repo here and kill us 89 + repo, err := atproto.SyncGetRepo(ctx, c, did, rev) 90 + if err != nil { 91 + reposFetched.WithLabelValues("fail").Inc() 92 + return nil, fmt.Errorf("failed to fetch repo (did=%s,rev=%s,host=%s): %w", did, rev, pds.Host, err) 93 + } 94 + reposFetched.WithLabelValues("success").Inc() 95 + 96 + return repo, nil 97 + } 98 + 99 + // TODO: since this function is the only place we depend on the repomanager, i wonder if this should be wired some other way? 100 + func (rf *RepoFetcher) FetchAndIndexRepo(ctx context.Context, job *crawlWork) error { 101 + ctx, span := otel.Tracer("indexer").Start(ctx, "FetchAndIndexRepo") 102 + defer span.End() 103 + 104 + span.SetAttributes(attribute.Int("catchup", len(job.catchup))) 105 + 106 + ai := job.act 107 + 108 + var pds models.PDS 109 + if err := rf.db.First(&pds, "id = ?", ai.PDS).Error; err != nil { 110 + catchupEventsFailed.WithLabelValues("nopds").Inc() 111 + return fmt.Errorf("expected to find pds record (%d) in db for crawling one of their users: %w", ai.PDS, err) 112 + } 113 + 114 + c := models.ClientForPds(&pds) 115 + rf.ApplyPDSClientSettings(c) 116 + 117 + repo, err := rf.fetchRepo(ctx, c, &pds, ai.Did, "") 118 + if err != nil { 119 + return err 120 + } 121 + 122 + if err := rf.repoman.ImportNewRepo(ctx, ai.ID, ai.Did, bytes.NewReader(repo), nil); err != nil { 123 + span.RecordError(err) 124 + return fmt.Errorf("failed to import backup repo (%s): %w", ai.Did, err) 125 + } 126 + 127 + return nil 128 + }
+1 -1
bgs/bgs.go
··· 188 188 cOpts := DefaultCompactorOptions() 189 189 cOpts.NumWorkers = config.NumCompactionWorkers 190 190 compactor := NewCompactor(cOpts) 191 - compactor.requeueInterval = config.CompactInterval 191 + compactor.RequeueInterval = config.CompactInterval 192 192 compactor.Start(bgs) 193 193 bgs.compactor = compactor 194 194
+5 -5
bgs/compactor.go
··· 145 145 q *uniQueue 146 146 stateLk sync.RWMutex 147 147 exit chan struct{} 148 - requeueInterval time.Duration 148 + RequeueInterval time.Duration 149 149 requeueLimit int 150 150 requeueShardCount int 151 151 requeueFast bool ··· 182 182 members: make(map[models.Uid]struct{}), 183 183 }, 184 184 exit: make(chan struct{}), 185 - requeueInterval: opts.RequeueInterval, 185 + RequeueInterval: opts.RequeueInterval, 186 186 requeueLimit: opts.RequeueLimit, 187 187 requeueFast: opts.RequeueFast, 188 188 requeueShardCount: opts.RequeueShardCount, ··· 208 208 } 209 209 go c.doWork(bgs, strategy) 210 210 } 211 - if c.requeueInterval > 0 { 211 + if c.RequeueInterval > 0 { 212 212 go func() { 213 213 log.Info("starting compactor requeue routine", 214 - "interval", c.requeueInterval, 214 + "interval", c.RequeueInterval, 215 215 "limit", c.requeueLimit, 216 216 "shardCount", c.requeueShardCount, 217 217 "fast", c.requeueFast, 218 218 ) 219 219 220 - t := time.NewTicker(c.requeueInterval) 220 + t := time.NewTicker(c.RequeueInterval) 221 221 for { 222 222 select { 223 223 case <-c.exit:
+20 -6
carstore/bs.go
··· 52 52 53 53 GetUserRepoHead(ctx context.Context, user models.Uid) (cid.Cid, error) 54 54 GetUserRepoRev(ctx context.Context, user models.Uid) (string, error) 55 - ImportSlice(ctx context.Context, uid models.Uid, since *string, carslice []byte) (cid.Cid, *DeltaSession, error) 56 - NewDeltaSession(ctx context.Context, user models.Uid, since *string) (*DeltaSession, error) 57 - ReadOnlySession(user models.Uid) (*DeltaSession, error) 55 + ImportSlice(ctx context.Context, uid models.Uid, since *string, carslice []byte) (cid.Cid, BlockStorage, error) 56 + NewDeltaSession(ctx context.Context, user models.Uid, since *string) (BlockStorage, error) 57 + ReadOnlySession(user models.Uid) (BlockStorage, error) 58 58 ReadUserCar(ctx context.Context, user models.Uid, sinceRev string, incremental bool, w io.Writer) error 59 59 Stat(ctx context.Context, usr models.Uid) ([]UserStat, error) 60 60 WipeUserData(ctx context.Context, user models.Uid) error 61 + } 62 + 63 + type BlockStorage interface { 64 + BaseCid() cid.Cid 65 + Put(ctx context.Context, b blockformat.Block) error 66 + PutMany(ctx context.Context, bs []blockformat.Block) error 67 + AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) 68 + DeleteBlock(ctx context.Context, c cid.Cid) error 69 + Get(ctx context.Context, c cid.Cid) (blockformat.Block, error) 70 + Has(ctx context.Context, c cid.Cid) (bool, error) 71 + HashOnRead(hor bool) 72 + GetSize(ctx context.Context, c cid.Cid) (int, error) 73 + CloseWithRoot(ctx context.Context, root cid.Cid, rev string) ([]byte, error) 74 + CalcDiff(ctx context.Context, skipcids map[cid.Cid]bool) error 61 75 } 62 76 63 77 type FileCarStore struct { ··· 318 332 319 333 var ErrRepoBaseMismatch = fmt.Errorf("attempted a delta session on top of the wrong previous head") 320 334 321 - func (cs *FileCarStore) NewDeltaSession(ctx context.Context, user models.Uid, since *string) (*DeltaSession, error) { 335 + func (cs *FileCarStore) NewDeltaSession(ctx context.Context, user models.Uid, since *string) (BlockStorage, error) { 322 336 ctx, span := otel.Tracer("carstore").Start(ctx, "NewSession") 323 337 defer span.End() 324 338 ··· 349 363 }, nil 350 364 } 351 365 352 - func (cs *FileCarStore) ReadOnlySession(user models.Uid) (*DeltaSession, error) { 366 + func (cs *FileCarStore) ReadOnlySession(user models.Uid) (BlockStorage, error) { 353 367 return &DeltaSession{ 354 368 base: &userView{ 355 369 user: user, ··· 768 782 return dropset, nil 769 783 } 770 784 771 - func (cs *FileCarStore) ImportSlice(ctx context.Context, uid models.Uid, since *string, carslice []byte) (cid.Cid, *DeltaSession, error) { 785 + func (cs *FileCarStore) ImportSlice(ctx context.Context, uid models.Uid, since *string, carslice []byte) (cid.Cid, BlockStorage, error) { 772 786 ctx, span := otel.Tracer("carstore").Start(ctx, "ImportSlice") 773 787 defer span.End() 774 788
+5 -4
carstore/nonarchive.go
··· 4 4 "bytes" 5 5 "context" 6 6 "fmt" 7 - ipld "github.com/ipfs/go-ipld-format" 8 7 "io" 9 8 "log/slog" 10 9 "sync" 10 + 11 + ipld "github.com/ipfs/go-ipld-format" 11 12 12 13 "github.com/bluesky-social/indigo/models" 13 14 blockformat "github.com/ipfs/go-block-format" ··· 126 127 127 128 var commitRefZero = commitRefInfo{} 128 129 129 - func (cs *NonArchivalCarstore) NewDeltaSession(ctx context.Context, user models.Uid, since *string) (*DeltaSession, error) { 130 + func (cs *NonArchivalCarstore) NewDeltaSession(ctx context.Context, user models.Uid, since *string) (BlockStorage, error) { 130 131 ctx, span := otel.Tracer("carstore").Start(ctx, "NewSession") 131 132 defer span.End() 132 133 ··· 160 161 }, nil 161 162 } 162 163 163 - func (cs *NonArchivalCarstore) ReadOnlySession(user models.Uid) (*DeltaSession, error) { 164 + func (cs *NonArchivalCarstore) ReadOnlySession(user models.Uid) (BlockStorage, error) { 164 165 return &DeltaSession{ 165 166 base: &userView{ 166 167 user: user, ··· 179 180 return fmt.Errorf("not supported in non-archival mode") 180 181 } 181 182 182 - func (cs *NonArchivalCarstore) ImportSlice(ctx context.Context, uid models.Uid, since *string, carslice []byte) (cid.Cid, *DeltaSession, error) { 183 + func (cs *NonArchivalCarstore) ImportSlice(ctx context.Context, uid models.Uid, since *string, carslice []byte) (cid.Cid, BlockStorage, error) { 183 184 ctx, span := otel.Tracer("carstore").Start(ctx, "ImportSlice") 184 185 defer span.End() 185 186
+2 -1
carstore/repo_test.go
··· 16 16 appbsky "github.com/bluesky-social/indigo/api/bsky" 17 17 "github.com/bluesky-social/indigo/repo" 18 18 "github.com/bluesky-social/indigo/util" 19 + 19 20 //sqlbs "github.com/ipfs/go-bs-sqlite3" 20 21 "github.com/ipfs/go-cid" 21 22 flatfs "github.com/ipfs/go-ds-flatfs" ··· 521 522 522 523 var cids []cid.Cid 523 524 var revs []string 524 - for _, ds := range []*DeltaSession{ds1, ds2, ds3} { 525 + for _, ds := range []BlockStorage{ds1, ds2, ds3} { 525 526 ncid, rev, err := setupRepo(ctx, ds, true) 526 527 if err != nil { 527 528 t.Fatal(err)
+5 -4
carstore/sqlite_store.go
··· 6 6 "database/sql" 7 7 "errors" 8 8 "fmt" 9 - "go.opentelemetry.io/otel/attribute" 10 9 "io" 11 10 "log/slog" 12 11 "os" 13 12 "path/filepath" 13 + 14 + "go.opentelemetry.io/otel/attribute" 14 15 15 16 "github.com/bluesky-social/indigo/models" 16 17 blockformat "github.com/ipfs/go-block-format" ··· 238 239 return lastShard.Rev, nil 239 240 } 240 241 241 - func (sqs *SQLiteStore) ImportSlice(ctx context.Context, uid models.Uid, since *string, carslice []byte) (cid.Cid, *DeltaSession, error) { 242 + func (sqs *SQLiteStore) ImportSlice(ctx context.Context, uid models.Uid, since *string, carslice []byte) (cid.Cid, BlockStorage, error) { 242 243 // TODO: same as FileCarStore, re-unify 243 244 ctx, span := otel.Tracer("carstore").Start(ctx, "ImportSlice") 244 245 defer span.End() ··· 279 280 280 281 var zeroShard CarShard 281 282 282 - func (sqs *SQLiteStore) NewDeltaSession(ctx context.Context, user models.Uid, since *string) (*DeltaSession, error) { 283 + func (sqs *SQLiteStore) NewDeltaSession(ctx context.Context, user models.Uid, since *string) (BlockStorage, error) { 283 284 ctx, span := otel.Tracer("carstore").Start(ctx, "NewSession") 284 285 defer span.End() 285 286 ··· 312 313 }, nil 313 314 } 314 315 315 - func (sqs *SQLiteStore) ReadOnlySession(user models.Uid) (*DeltaSession, error) { 316 + func (sqs *SQLiteStore) ReadOnlySession(user models.Uid) (BlockStorage, error) { 316 317 return &DeltaSession{ 317 318 base: &sqliteUserView{ 318 319 uid: user,
+51
cmd/archit/Dockerfile
··· 1 + # Run this dockerfile from the top level of the indigo git repository like: 2 + # 3 + # podman build -f ./cmd/bigsky/Dockerfile -t bigsky . 4 + 5 + ### Compile stage 6 + FROM golang:1.24-alpine3.22 AS build-env 7 + RUN apk add --no-cache build-base make git 8 + 9 + ADD . /dockerbuild 10 + WORKDIR /dockerbuild 11 + 12 + # timezone data for alpine builds 13 + ENV GOEXPERIMENT=loopvar 14 + RUN GIT_VERSION=$(git describe --tags --long --always) && \ 15 + echo "replace github.com/gocql/gocql => github.com/scylladb/gocql v1.14.4" >> go.mod && \ 16 + go mod tidy && \ 17 + go build -tags timetzdata,scylla -o /bigsky ./cmd/bigsky 18 + 19 + ### Build Frontend stage 20 + FROM node:18-alpine as web-builder 21 + 22 + WORKDIR /app 23 + 24 + COPY ts/bgs-dash /app/ 25 + 26 + RUN yarn install --frozen-lockfile 27 + 28 + RUN yarn build 29 + 30 + ### Run stage 31 + FROM alpine:3.22 32 + 33 + RUN apk add --no-cache --update dumb-init ca-certificates runit 34 + ENTRYPOINT ["dumb-init", "--"] 35 + 36 + WORKDIR / 37 + RUN mkdir -p data/bigsky 38 + COPY --from=build-env /bigsky / 39 + COPY --from=web-builder /app/dist/ public/ 40 + 41 + # small things to make golang binaries work well under alpine 42 + ENV GODEBUG=netdns=go 43 + ENV TZ=Etc/UTC 44 + 45 + EXPOSE 2470 46 + 47 + CMD ["/bigsky"] 48 + 49 + LABEL org.opencontainers.image.source=https://github.com/bluesky-social/indigo 50 + LABEL org.opencontainers.image.description="ATP Relay (aka BGS)" 51 + LABEL org.opencontainers.image.licenses=MIT
+33
cmd/archit/crawl_pds.sh
··· 1 + #!/usr/bin/env bash 2 + 3 + set -e # fail on error 4 + set -u # fail if variable not set in substitution 5 + set -o pipefail # fail if part of a '|' command fails 6 + 7 + if test -z "${RELAY_ADMIN_KEY}"; then 8 + echo "RELAY_ADMIN_KEY secret is not defined" 9 + exit -1 10 + fi 11 + 12 + if test -z "${RELAY_HOST}"; then 13 + echo "RELAY_HOST config not defined" 14 + exit -1 15 + fi 16 + 17 + if test -z "$1"; then 18 + echo "expected PDS hostname as an argument" 19 + exit -1 20 + fi 21 + 22 + echo "requestCrawl $1" 23 + http --quiet --ignore-stdin post https://${RELAY_HOST}/admin/pds/requestCrawl Authorization:"Bearer ${RELAY_ADMIN_KEY}" \ 24 + hostname=$1 25 + 26 + echo "changeLimits $1" 27 + http --quiet --ignore-stdin post https://${RELAY_HOST}/admin/pds/changeLimits Authorization:"Bearer ${RELAY_ADMIN_KEY}" \ 28 + per_second:=100 \ 29 + per_hour:=1000000 \ 30 + per_day:=1000000 \ 31 + crawl_rate:=10 \ 32 + repo_limit:=1000000 \ 33 + host=$1
+15
cmd/archit/docker-compose.yml
··· 1 + version: "3.8" 2 + services: 3 + postgres: 4 + image: "postgres:14" 5 + ports: 6 + - "5432:5432" 7 + volumes: 8 + - type: bind 9 + source: /mnt/postgres 10 + target: /var/lib/postgresql/data 11 + restart: always 12 + environment: 13 + POSTGRES_USER: bgs 14 + POSTGRES_PASSWORD: 33pAstcHDMszLedQah2EVYNgnxbCP 15 + POSTGRES_DB: bgs
+463
cmd/archit/main.go
··· 1 + package main 2 + 3 + import ( 4 + "context" 5 + "fmt" 6 + "log/slog" 7 + "net/http" 8 + _ "net/http/pprof" 9 + "net/url" 10 + "os" 11 + "os/signal" 12 + "path/filepath" 13 + "strings" 14 + "syscall" 15 + "time" 16 + 17 + archiver "github.com/bluesky-social/indigo/archiver" 18 + "github.com/bluesky-social/indigo/carstore" 19 + "github.com/bluesky-social/indigo/did" 20 + "github.com/bluesky-social/indigo/events" 21 + "github.com/bluesky-social/indigo/handles" 22 + "github.com/bluesky-social/indigo/indexer" 23 + "github.com/bluesky-social/indigo/plc" 24 + "github.com/bluesky-social/indigo/repomgr" 25 + "github.com/bluesky-social/indigo/util" 26 + "github.com/bluesky-social/indigo/util/cliutil" 27 + "github.com/bluesky-social/indigo/xrpc" 28 + 29 + _ "github.com/joho/godotenv/autoload" 30 + _ "go.uber.org/automaxprocs" 31 + 32 + "github.com/carlmjohnson/versioninfo" 33 + "github.com/urfave/cli/v2" 34 + "go.opentelemetry.io/otel" 35 + "go.opentelemetry.io/otel/attribute" 36 + "go.opentelemetry.io/otel/exporters/jaeger" 37 + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" 38 + "go.opentelemetry.io/otel/sdk/resource" 39 + tracesdk "go.opentelemetry.io/otel/sdk/trace" 40 + semconv "go.opentelemetry.io/otel/semconv/v1.4.0" 41 + ) 42 + 43 + var log = slog.Default().With("system", "archiver") 44 + 45 + func init() { 46 + // control log level using, eg, GOLOG_LOG_LEVEL=debug 47 + //logging.SetAllLoggers(logging.LevelDebug) 48 + } 49 + 50 + func main() { 51 + if err := run(os.Args); err != nil { 52 + slog.Error(err.Error()) 53 + os.Exit(1) 54 + } 55 + } 56 + 57 + func run(args []string) error { 58 + 59 + app := cli.App{ 60 + Name: "archiver", 61 + Usage: "atproto repo archiver daemon", 62 + Version: versioninfo.Short(), 63 + } 64 + 65 + app.Flags = []cli.Flag{ 66 + &cli.BoolFlag{ 67 + Name: "jaeger", 68 + }, 69 + &cli.StringFlag{ 70 + Name: "db-url", 71 + Usage: "database connection string for database", 72 + Value: "sqlite://./data/archiver/db.sqlite", 73 + EnvVars: []string{"DATABASE_URL"}, 74 + }, 75 + &cli.BoolFlag{ 76 + Name: "db-tracing", 77 + }, 78 + &cli.StringFlag{ 79 + Name: "data-dir", 80 + Usage: "path of directory for CAR files and other data", 81 + Value: "data/archiver", 82 + EnvVars: []string{"ARCHIVER_DATA_DIR", "DATA_DIR"}, 83 + }, 84 + &cli.StringFlag{ 85 + Name: "plc-host", 86 + Usage: "method, hostname, and port of PLC registry", 87 + Value: "https://plc.directory", 88 + EnvVars: []string{"ATP_PLC_HOST"}, 89 + }, 90 + &cli.BoolFlag{ 91 + Name: "crawl-insecure-ws", 92 + Usage: "when connecting to PDS instances, use ws:// instead of wss://", 93 + }, 94 + &cli.StringFlag{ 95 + Name: "api-listen", 96 + Value: ":2970", 97 + }, 98 + &cli.StringFlag{ 99 + Name: "metrics-listen", 100 + Value: ":2971", 101 + EnvVars: []string{"ARCHIVER_METRICS_LISTEN", "BGS_METRICS_LISTEN"}, 102 + }, 103 + &cli.StringFlag{ 104 + Name: "admin-key", 105 + EnvVars: []string{"ARCHIVER_ADMIN_KEY", "BGS_ADMIN_KEY"}, 106 + }, 107 + &cli.StringSliceFlag{ 108 + Name: "handle-resolver-hosts", 109 + EnvVars: []string{"HANDLE_RESOLVER_HOSTS"}, 110 + }, 111 + &cli.IntFlag{ 112 + Name: "max-db-connections", 113 + EnvVars: []string{"MAX_METADB_CONNECTIONS"}, 114 + Value: 40, 115 + }, 116 + &cli.DurationFlag{ 117 + Name: "compact-interval", 118 + EnvVars: []string{"ARCHIVER_COMPACT_INTERVAL", "BGS_COMPACT_INTERVAL"}, 119 + Value: 4 * time.Hour, 120 + Usage: "interval between compaction runs, set to 0 to disable scheduled compaction", 121 + }, 122 + &cli.StringFlag{ 123 + Name: "resolve-address", 124 + EnvVars: []string{"RESOLVE_ADDRESS"}, 125 + Value: "1.1.1.1:53", 126 + }, 127 + &cli.BoolFlag{ 128 + Name: "force-dns-udp", 129 + EnvVars: []string{"FORCE_DNS_UDP"}, 130 + }, 131 + &cli.IntFlag{ 132 + Name: "max-fetch-concurrency", 133 + Value: 100, 134 + EnvVars: []string{"MAX_FETCH_CONCURRENCY"}, 135 + }, 136 + &cli.StringFlag{ 137 + Name: "env", 138 + Value: "dev", 139 + EnvVars: []string{"ENVIRONMENT"}, 140 + Usage: "declared hosting environment (prod, qa, etc); used in metrics", 141 + }, 142 + &cli.StringFlag{ 143 + Name: "otel-exporter-otlp-endpoint", 144 + EnvVars: []string{"OTEL_EXPORTER_OTLP_ENDPOINT"}, 145 + }, 146 + &cli.StringFlag{ 147 + Name: "bsky-social-rate-limit-skip", 148 + EnvVars: []string{"BSKY_SOCIAL_RATE_LIMIT_SKIP"}, 149 + Usage: "ratelimit bypass secret token for *.bsky.social domains", 150 + }, 151 + &cli.IntFlag{ 152 + Name: "default-repo-limit", 153 + Value: 100, 154 + EnvVars: []string{"ARCHIVER_DEFAULT_REPO_LIMIT"}, 155 + }, 156 + &cli.IntFlag{ 157 + Name: "concurrency-per-pds", 158 + EnvVars: []string{"ARCHIVER_CONCURRENCY_PER_PDS"}, 159 + Value: 100, 160 + }, 161 + &cli.IntFlag{ 162 + Name: "max-queue-per-pds", 163 + EnvVars: []string{"ARCHIVER_MAX_QUEUE_PER_PDS"}, 164 + Value: 1_000, 165 + }, 166 + &cli.IntFlag{ 167 + Name: "did-cache-size", 168 + EnvVars: []string{"ARCHIVER_DID_CACHE_SIZE"}, 169 + Value: 5_000_000, 170 + }, 171 + &cli.StringSliceFlag{ 172 + Name: "did-memcached", 173 + EnvVars: []string{"ARCHIVER_DID_MEMCACHED"}, 174 + }, 175 + &cli.IntFlag{ 176 + Name: "num-compaction-workers", 177 + EnvVars: []string{"ARCHIVER_NUM_COMPACTION_WORKERS"}, 178 + Value: 2, 179 + }, 180 + &cli.StringSliceFlag{ 181 + Name: "carstore-shard-dirs", 182 + Usage: "specify list of shard directories for carstore storage, overrides default storage within datadir", 183 + EnvVars: []string{"ARCHIVER_CARSTORE_SHARD_DIRS"}, 184 + }, 185 + } 186 + 187 + app.Action = runBigsky 188 + return app.Run(os.Args) 189 + } 190 + 191 + func setupOTEL(cctx *cli.Context) error { 192 + 193 + env := cctx.String("env") 194 + if env == "" { 195 + env = "dev" 196 + } 197 + if cctx.Bool("jaeger") { 198 + jaegerUrl := "http://localhost:14268/api/traces" 199 + exp, err := jaeger.New(jaeger.WithCollectorEndpoint(jaeger.WithEndpoint(jaegerUrl))) 200 + if err != nil { 201 + return err 202 + } 203 + tp := tracesdk.NewTracerProvider( 204 + // Always be sure to batch in production. 205 + tracesdk.WithBatcher(exp), 206 + // Record information about this application in a Resource. 207 + tracesdk.WithResource(resource.NewWithAttributes( 208 + semconv.SchemaURL, 209 + semconv.ServiceNameKey.String("arc"), 210 + attribute.String("env", env), // DataDog 211 + attribute.String("environment", env), // Others 212 + attribute.Int64("ID", 1), 213 + )), 214 + ) 215 + 216 + otel.SetTracerProvider(tp) 217 + } 218 + 219 + // Enable OTLP HTTP exporter 220 + // For relevant environment variables: 221 + // https://pkg.go.dev/go.opentelemetry.io/otel/exporters/otlp/otlptrace#readme-environment-variables 222 + // At a minimum, you need to set 223 + // OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 224 + if ep := cctx.String("otel-exporter-otlp-endpoint"); ep != "" { 225 + slog.Info("setting up trace exporter", "endpoint", ep) 226 + ctx, cancel := context.WithCancel(context.Background()) 227 + defer cancel() 228 + 229 + exp, err := otlptracehttp.New(ctx) 230 + if err != nil { 231 + slog.Error("failed to create trace exporter", "error", err) 232 + os.Exit(1) 233 + } 234 + defer func() { 235 + ctx, cancel := context.WithTimeout(context.Background(), time.Second) 236 + defer cancel() 237 + if err := exp.Shutdown(ctx); err != nil { 238 + slog.Error("failed to shutdown trace exporter", "error", err) 239 + } 240 + }() 241 + 242 + tp := tracesdk.NewTracerProvider( 243 + tracesdk.WithBatcher(exp), 244 + tracesdk.WithResource(resource.NewWithAttributes( 245 + semconv.SchemaURL, 246 + semconv.ServiceNameKey.String("archiver"), 247 + attribute.String("env", env), // DataDog 248 + attribute.String("environment", env), // Others 249 + attribute.Int64("ID", 1), 250 + )), 251 + ) 252 + otel.SetTracerProvider(tp) 253 + } 254 + 255 + return nil 256 + } 257 + 258 + func runBigsky(cctx *cli.Context) error { 259 + // Trap SIGINT to trigger a shutdown. 260 + signals := make(chan os.Signal, 1) 261 + signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM) 262 + 263 + _, _, err := cliutil.SetupSlog(cliutil.LogOptions{}) 264 + if err != nil { 265 + return err 266 + } 267 + 268 + // start observability/tracing (OTEL and jaeger) 269 + if err := setupOTEL(cctx); err != nil { 270 + return err 271 + } 272 + 273 + // ensure data directory exists; won't error if it does 274 + datadir := cctx.String("data-dir") 275 + csdir := filepath.Join(datadir, "carstore") 276 + if err := os.MkdirAll(datadir, os.ModePerm); err != nil { 277 + return err 278 + } 279 + 280 + dburl := cctx.String("db-url") 281 + slog.Info("setting up main database", "url", dburl) 282 + db, err := cliutil.SetupDatabase(dburl, cctx.Int("max-db-connections")) 283 + if err != nil { 284 + return err 285 + } 286 + 287 + // make standard FileCarStore 288 + csdirs := []string{csdir} 289 + if paramDirs := cctx.StringSlice("carstore-shard-dirs"); len(paramDirs) > 0 { 290 + csdirs = paramDirs 291 + } 292 + 293 + for _, csd := range csdirs { 294 + if err := os.MkdirAll(filepath.Dir(csd), os.ModePerm); err != nil { 295 + return err 296 + } 297 + } 298 + 299 + cstore, err := carstore.NewCarStore(db, csdirs) 300 + if err != nil { 301 + return err 302 + } 303 + 304 + // DID RESOLUTION 305 + // 1. the outside world, PLCSerever or Web 306 + // 2. (maybe memcached) 307 + // 3. in-process cache 308 + var cachedidr did.Resolver 309 + { 310 + mr := did.NewMultiResolver() 311 + 312 + didr := &plc.PLCServer{Host: cctx.String("plc-host")} 313 + mr.AddHandler("plc", didr) 314 + 315 + webr := did.WebResolver{} 316 + if cctx.Bool("crawl-insecure-ws") { 317 + webr.Insecure = true 318 + } 319 + mr.AddHandler("web", &webr) 320 + 321 + var prevResolver did.Resolver 322 + memcachedServers := cctx.StringSlice("did-memcached") 323 + if len(memcachedServers) > 0 { 324 + prevResolver = plc.NewMemcachedDidResolver(mr, time.Hour*24, memcachedServers) 325 + } else { 326 + prevResolver = mr 327 + } 328 + 329 + cachedidr = plc.NewCachingDidResolver(prevResolver, time.Hour*24, cctx.Int("did-cache-size")) 330 + } 331 + 332 + kmgr := indexer.NewKeyManager(cachedidr, nil) 333 + 334 + repoman := repomgr.NewRepoManager(cstore, kmgr) 335 + 336 + rf := archiver.NewRepoFetcher(db, repoman, cctx.Int("max-fetch-concurrency")) 337 + 338 + nullfunc := func(ctx context.Context, evt *events.XRPCStreamEvent) error { 339 + return nil 340 + } 341 + 342 + ix, err := archiver.NewIndexer(db, nullfunc, cachedidr, rf, true) 343 + if err != nil { 344 + return err 345 + } 346 + defer ix.Shutdown() 347 + 348 + rlskip := cctx.String("bsky-social-rate-limit-skip") 349 + ix.ApplyPDSClientSettings = func(c *xrpc.Client) { 350 + if c.Client == nil { 351 + c.Client = util.RobustHTTPClient() 352 + } 353 + if strings.HasSuffix(c.Host, ".bsky.network") { 354 + c.Client.Timeout = time.Minute * 30 355 + if rlskip != "" { 356 + c.Headers = map[string]string{ 357 + "x-ratelimit-bypass": rlskip, 358 + } 359 + } 360 + } else { 361 + // Generic PDS timeout 362 + c.Client.Timeout = time.Minute * 1 363 + } 364 + } 365 + rf.ApplyPDSClientSettings = ix.ApplyPDSClientSettings 366 + 367 + repoman.SetEventHandler(func(ctx context.Context, evt *repomgr.RepoEvent) { 368 + if err := ix.HandleRepoEvent(ctx, evt); err != nil { 369 + slog.Error("failed to handle repo event", "err", err) 370 + } 371 + }, false) 372 + 373 + prodHR, err := handles.NewProdHandleResolver(100_000, cctx.String("resolve-address"), cctx.Bool("force-dns-udp")) 374 + if err != nil { 375 + return fmt.Errorf("failed to set up handle resolver: %w", err) 376 + } 377 + if rlskip != "" { 378 + prodHR.ReqMod = func(req *http.Request, host string) error { 379 + if strings.HasSuffix(host, ".bsky.social") { 380 + req.Header.Set("x-ratelimit-bypass", rlskip) 381 + } 382 + return nil 383 + } 384 + } 385 + 386 + var hr handles.HandleResolver = prodHR 387 + if cctx.StringSlice("handle-resolver-hosts") != nil { 388 + hr = &handles.TestHandleResolver{ 389 + TrialHosts: cctx.StringSlice("handle-resolver-hosts"), 390 + } 391 + } 392 + 393 + slog.Info("constructing archiver") 394 + archiverConfig := archiver.DefaultArchiverConfig() 395 + archiverConfig.SSL = !cctx.Bool("crawl-insecure-ws") 396 + archiverConfig.CompactInterval = cctx.Duration("compact-interval") 397 + archiverConfig.ConcurrencyPerPDS = cctx.Int64("concurrency-per-pds") 398 + archiverConfig.MaxQueuePerPDS = cctx.Int64("max-queue-per-pds") 399 + archiverConfig.DefaultRepoLimit = cctx.Int64("default-repo-limit") 400 + archiverConfig.NumCompactionWorkers = cctx.Int("num-compaction-workers") 401 + nextCrawlers := cctx.StringSlice("next-crawler") 402 + if len(nextCrawlers) != 0 { 403 + nextCrawlerUrls := make([]*url.URL, len(nextCrawlers)) 404 + for i, tu := range nextCrawlers { 405 + var err error 406 + nextCrawlerUrls[i], err = url.Parse(tu) 407 + if err != nil { 408 + return fmt.Errorf("failed to parse next-crawler url: %w", err) 409 + } 410 + slog.Info("configuring relay for requestCrawl", "host", nextCrawlerUrls[i]) 411 + } 412 + archiverConfig.NextCrawlers = nextCrawlerUrls 413 + } 414 + 415 + arc, err := archiver.NewArchiver(db, ix, repoman, cachedidr, rf, hr, archiverConfig) 416 + if err != nil { 417 + return err 418 + } 419 + 420 + if tok := cctx.String("admin-key"); tok != "" { 421 + if err := arc.CreateAdminToken(tok); err != nil { 422 + return fmt.Errorf("failed to set up admin token: %w", err) 423 + } 424 + } 425 + 426 + // set up metrics endpoint 427 + go func() { 428 + if err := arc.StartMetrics(cctx.String("metrics-listen")); err != nil { 429 + log.Error("failed to start metrics endpoint", "err", err) 430 + os.Exit(1) 431 + } 432 + }() 433 + 434 + arcErr := make(chan error, 1) 435 + 436 + go func() { 437 + err := arc.Start(cctx.String("api-listen")) 438 + arcErr <- err 439 + }() 440 + 441 + slog.Info("startup complete") 442 + select { 443 + case <-signals: 444 + log.Info("received shutdown signal") 445 + errs := arc.Shutdown() 446 + for err := range errs { 447 + slog.Error("error during BGS shutdown", "err", err) 448 + } 449 + case err := <-arcErr: 450 + if err != nil { 451 + slog.Error("error during BGS startup", "err", err) 452 + } 453 + log.Info("shutting down") 454 + errs := arc.Shutdown() 455 + for err := range errs { 456 + slog.Error("error during BGS shutdown", "err", err) 457 + } 458 + } 459 + 460 + log.Info("shutdown complete") 461 + 462 + return nil 463 + }
+115
cmd/archit/resync_pdses.py
··· 1 + #!/usr/bin/env python3 2 + # 3 + # pip install requests 4 + # 5 + # python3 resync_pdses.py --admin-key hunter2 --url http://myrelay:2470 host_per_line.txt 6 + 7 + import json 8 + import sys 9 + import urllib.parse 10 + 11 + import requests 12 + 13 + 14 + # pds limits for POST /admin/pds/changeLimits 15 + # {"host":"", "per_second": int, "per_hour": int, "per_day": int, "crawl_rate": int, "repo_limit": int} 16 + 17 + limitsKeys = ('per_second', 'per_hour', 'per_day', 'crawl_rate', 'repo_limit') 18 + 19 + def checkLimits(limits): 20 + for k in limits.keys(): 21 + if k not in limitsKeys: 22 + raise Exception(f"unknown pds rate limits key {k!r}") 23 + return True 24 + 25 + class relay: 26 + def __init__(self, rooturl, headers=None, session=None): 27 + "rooturl string, headers dict or None, session requests.Session() or None" 28 + self.rooturl = rooturl 29 + self.headers = headers or dict() 30 + self.session = session or requests.Session() 31 + 32 + def resync(self, host): 33 + "host string" 34 + url = urllib.parse.urljoin(self.rooturl, '/admin/pds/resync') 35 + response = self.session.post(url, params={"host": host}, headers=self.headers, data='') 36 + if response.status_code != 200: 37 + sys.stderr.write(f"{url}?host={host} : ({response.status_code}) ({response.text!r})\n") 38 + else: 39 + sys.stderr.write(f"{url}?host={host} : OK\n") 40 + 41 + def crawlAndSetLimits(self, host, limits): 42 + "host string, limits dict" 43 + pheaders = dict(self.headers) 44 + pheaders['Content-Type'] = 'application/json' 45 + url = urllib.parse.urljoin(self.rooturl, '/admin/pds/requestCrawl') 46 + response = self.session.post(url, headers=pheaders, data=json.dumps({"hostname": host})) 47 + if response.status_code != 200: 48 + sys.stderr.write(f"{url} {host} : {response.status_code} {response.text!r}\n") 49 + return 50 + if limits is None: 51 + sys.stderr.write(f"requestCrawl {host} OK\n") 52 + url = urllib.parse.urljoin(self.rooturl, '/admin/pds/changeLimits') 53 + plimits = dict(limits) 54 + plimits["host"] = host 55 + response = self.session.post(url, headers=pheaders, data=json.dumps(plimits)) 56 + if response.status_code != 200: 57 + sys.stderr.write(f"{url} {host} : {response.status_code} {response.text!r}\n") 58 + return 59 + sys.stderr.write(f"requestCrawl + changeLimits {host} OK\n") 60 + 61 + def main(): 62 + import argparse 63 + ap = argparse.ArgumentParser() 64 + ap.add_argument('input', default='-', help='host per line text file to read, - for stdin') 65 + ap.add_argument('--admin-key', default=None, help='relay auth bearer token', required=True) 66 + ap.add_argument('--url', default=None, help='base url to POST /admin/pds/resync', required=True) 67 + ap.add_argument('--resync', default=False, action='store_true', help='resync selected PDSes') 68 + ap.add_argument('--limits', default=None, help='json pds rate limits') 69 + ap.add_argument('--crawl', default=False, action='store_true', help='crawl & set limits') 70 + args = ap.parse_args() 71 + 72 + headers = {'Authorization': 'Bearer ' + args.admin_key} 73 + 74 + relaySession = relay(args.url, headers) 75 + 76 + #url = urllib.parse.urljoin(args.url, '/admin/pds/resync') 77 + 78 + #sess = requests.Session() 79 + if args.crawl and args.resync: 80 + sys.stderr.write("should only specify one of --resync --crawl") 81 + sys.exit(1) 82 + if (not args.crawl) and (not args.resync): 83 + sys.stderr.write("should specify one of --resync --crawl") 84 + sys.exit(1) 85 + 86 + limits = None 87 + if args.limits: 88 + limits = json.loads(args.limits) 89 + checkLimits(limits) 90 + 91 + if args.input == '-': 92 + fin = sys.stdin 93 + else: 94 + fin = open(args.input, 'rt') 95 + for line in fin: 96 + if not line: 97 + continue 98 + line = line.strip() 99 + if not line: 100 + continue 101 + if line[0] == '#': 102 + continue 103 + host = line 104 + if args.crawl: 105 + relaySession.crawlAndSetLimits(host, limits) 106 + elif args.resync: 107 + relaySession.resync(host) 108 + # response = sess.post(url, params={"host": line}, headers=headers) 109 + # if response.status_code != 200: 110 + # sys.stderr.write(f"{url}?host={line} : ({response.status_code}) ({response.text!r})\n") 111 + # else: 112 + # sys.stderr.write(f"{url}?host={line} : OK\n") 113 + 114 + if __name__ == '__main__': 115 + main()
+24
cmd/archit/sync_pds.sh
··· 1 + #!/usr/bin/env bash 2 + 3 + set -e # fail on error 4 + set -u # fail if variable not set in substitution 5 + set -o pipefail # fail if part of a '|' command fails 6 + 7 + if test -z "${RELAY_ADMIN_KEY}"; then 8 + echo "RELAY_ADMIN_KEY secret is not defined" 9 + exit -1 10 + fi 11 + 12 + if test -z "${RELAY_HOST}"; then 13 + echo "RELAY_HOST config not defined" 14 + exit -1 15 + fi 16 + 17 + if test -z "$1"; then 18 + echo "expected PDS hostname as an argument" 19 + exit -1 20 + fi 21 + 22 + echo "POST resync $1" 23 + http --ignore-stdin post https://${RELAY_HOST}/admin/pds/resync Authorization:"Bearer ${RELAY_ADMIN_KEY}" \ 24 + host==$1
+24
cmd/archit/sync_status_pds.sh
··· 1 + #!/usr/bin/env bash 2 + 3 + set -e # fail on error 4 + set -u # fail if variable not set in substitution 5 + set -o pipefail # fail if part of a '|' command fails 6 + 7 + if test -z "${RELAY_ADMIN_KEY}"; then 8 + echo "RELAY_ADMIN_KEY secret is not defined" 9 + exit -1 10 + fi 11 + 12 + if test -z "${RELAY_HOST}"; then 13 + echo "RELAY_HOST config not defined" 14 + exit -1 15 + fi 16 + 17 + if test -z "$1"; then 18 + echo "expected PDS hostname as an argument" 19 + exit -1 20 + fi 21 + 22 + echo "GET resync $1" 23 + http --ignore-stdin --pretty all get https://${RELAY_HOST}/admin/pds/resync Authorization:"Bearer ${RELAY_ADMIN_KEY}" \ 24 + host==$1
+41
repostore/README.md
··· 1 + # Carstore 2 + 3 + Store a zillion users of PDS-like repo, with more limited operations (mainly: firehose in, firehose out). 4 + 5 + ## [ScyllaStore](scylla.go) 6 + 7 + Blocks stored in ScyllaDB. 8 + User and PDS metadata stored in gorm (PostgreSQL or sqlite3). 9 + 10 + ## [FileCarStore](bs.go) 11 + 12 + Store 'car slices' from PDS source subscribeRepo firehose streams to filesystem. 13 + Store metadata to gorm postgresql (or sqlite3). 14 + Periodic compaction of car slices into fewer larger car slices. 15 + User and PDS metadata stored in gorm (PostgreSQL or sqlite3). 16 + FileCarStore was the first production carstore and used through at least 2024-11. 17 + 18 + ## [SQLiteStore](sqlite_store.go) 19 + 20 + Experimental/demo. 21 + Blocks stored in trivial local sqlite3 schema. 22 + Minimal reference implementation from which fancy scalable/performant implementations may be derived. 23 + 24 + ```sql 25 + CREATE TABLE IF NOT EXISTS blocks (uid int, cid blob, rev varchar, root blob, block blob, PRIMARY KEY(uid,cid)) 26 + CREATE INDEX IF NOT EXISTS blocx_by_rev ON blocks (uid, rev DESC) 27 + 28 + INSERT INTO blocks (uid, cid, rev, root, block) VALUES (?, ?, ?, ?, ?) ON CONFLICT (uid,cid) DO UPDATE SET rev=excluded.rev, root=excluded.root, block=excluded.block 29 + 30 + SELECT rev, root FROM blocks WHERE uid = ? ORDER BY rev DESC LIMIT 1 31 + 32 + SELECT cid,rev,root,block FROM blocks WHERE uid = ? AND rev > ? ORDER BY rev DESC 33 + 34 + DELETE FROM blocks WHERE uid = ? 35 + 36 + SELECT rev, root FROM blocks WHERE uid = ? AND cid = ? LIMIT 1 37 + 38 + SELECT block FROM blocks WHERE uid = ? AND cid = ? LIMIT 1 39 + 40 + SELECT length(block) FROM blocks WHERE uid = ? AND cid = ? LIMIT 1 41 + ```
+1155
repostore/bs.go
··· 1 + package repostore 2 + 3 + import ( 4 + "bufio" 5 + "bytes" 6 + "context" 7 + "fmt" 8 + "io" 9 + "log/slog" 10 + "os" 11 + "path/filepath" 12 + "sort" 13 + "time" 14 + 15 + carstore "github.com/bluesky-social/indigo/carstore" 16 + carstore1 "github.com/bluesky-social/indigo/carstore" 17 + "github.com/bluesky-social/indigo/models" 18 + "github.com/prometheus/client_golang/prometheus" 19 + "github.com/prometheus/client_golang/prometheus/promauto" 20 + 21 + blockformat "github.com/ipfs/go-block-format" 22 + "github.com/ipfs/go-cid" 23 + blockstore "github.com/ipfs/go-ipfs-blockstore" 24 + cbor "github.com/ipfs/go-ipld-cbor" 25 + ipld "github.com/ipfs/go-ipld-format" 26 + "github.com/ipfs/go-libipfs/blocks" 27 + car "github.com/ipld/go-car" 28 + carutil "github.com/ipld/go-car/util" 29 + cbg "github.com/whyrusleeping/cbor-gen" 30 + "go.opentelemetry.io/otel" 31 + "go.opentelemetry.io/otel/attribute" 32 + "gorm.io/gorm" 33 + ) 34 + 35 + var blockGetTotalCounter = promauto.NewCounterVec(prometheus.CounterOpts{ 36 + Name: "carstore2_block_get_total", 37 + Help: "carstore get queries", 38 + }, []string{"usrskip", "cache"}) 39 + 40 + const MaxSliceLength = 2 << 20 41 + 42 + const BigShardThreshold = 2 << 20 43 + 44 + type CarStore interface { 45 + // TODO: not really part of general interface 46 + CompactUserShards(ctx context.Context, user models.Uid, skipBigShards bool) (*carstore1.CompactionStats, error) 47 + // TODO: not really part of general interface 48 + GetCompactionTargets(ctx context.Context, shardCount int) ([]carstore1.CompactionTarget, error) 49 + 50 + GetUserRepoHead(ctx context.Context, user models.Uid) (cid.Cid, error) 51 + GetUserRepoRev(ctx context.Context, user models.Uid) (string, error) 52 + ImportSlice(ctx context.Context, uid models.Uid, since *string, carslice []byte) (cid.Cid, carstore1.BlockStorage, error) 53 + NewDeltaSession(ctx context.Context, user models.Uid, since *string) (carstore1.BlockStorage, error) 54 + ReadOnlySession(user models.Uid) (carstore1.BlockStorage, error) 55 + ReadUserCar(ctx context.Context, user models.Uid, sinceRev string, incremental bool, w io.Writer) error 56 + Stat(ctx context.Context, usr models.Uid) ([]carstore1.UserStat, error) 57 + WipeUserData(ctx context.Context, user models.Uid) error 58 + } 59 + 60 + type FileCarStore struct { 61 + meta *CarStoreGormMeta 62 + rootDirs []string 63 + 64 + lastShardCache lastShardCache 65 + 66 + log *slog.Logger 67 + } 68 + 69 + func NewCarStore(meta *gorm.DB, roots []string) (CarStore, error) { 70 + for _, root := range roots { 71 + if _, err := os.Stat(root); err != nil { 72 + if !os.IsNotExist(err) { 73 + return nil, err 74 + } 75 + 76 + if err := os.Mkdir(root, 0775); err != nil { 77 + return nil, err 78 + } 79 + } 80 + } 81 + if err := meta.AutoMigrate(&CarShard{}, &blockRef{}); err != nil { 82 + return nil, err 83 + } 84 + if err := meta.AutoMigrate(&staleRef{}); err != nil { 85 + return nil, err 86 + } 87 + 88 + gormMeta := &CarStoreGormMeta{meta: meta} 89 + out := &FileCarStore{ 90 + meta: gormMeta, 91 + rootDirs: roots, 92 + lastShardCache: lastShardCache{ 93 + source: gormMeta, 94 + }, 95 + log: slog.Default().With("system", "carstore"), 96 + } 97 + out.lastShardCache.Init() 98 + return out, nil 99 + } 100 + 101 + // wrapper into a block store that keeps track of which user we are working on behalf of 102 + type userView struct { 103 + user models.Uid 104 + 105 + cache map[cid.Cid]blockformat.Block 106 + prefetch bool 107 + } 108 + 109 + var _ blockstore.Blockstore = (*userView)(nil) 110 + 111 + func (uv *userView) HashOnRead(hor bool) { 112 + //noop 113 + } 114 + 115 + func (uv *userView) Has(ctx context.Context, k cid.Cid) (bool, error) { 116 + _, have := uv.cache[k] 117 + if have { 118 + return have, nil 119 + } 120 + return false, nil 121 + } 122 + 123 + func (uv *userView) Get(ctx context.Context, k cid.Cid) (blockformat.Block, error) { 124 + if !k.Defined() { 125 + return nil, fmt.Errorf("attempted to 'get' undefined cid") 126 + } 127 + if uv.cache != nil { 128 + blk, ok := uv.cache[k] 129 + if ok { 130 + return blk, nil 131 + } 132 + } 133 + 134 + return nil, fmt.Errorf("cant do arbitrary reads from this") 135 + } 136 + 137 + const prefetchThreshold = 512 << 10 138 + 139 + func (uv *userView) prefetchRead(ctx context.Context, k cid.Cid, path string, offset int64) (blockformat.Block, error) { 140 + ctx, span := otel.Tracer("carstore").Start(ctx, "getLastShard") 141 + defer span.End() 142 + 143 + fi, err := os.Open(path) 144 + if err != nil { 145 + return nil, err 146 + } 147 + defer fi.Close() 148 + 149 + st, err := fi.Stat() 150 + if err != nil { 151 + return nil, fmt.Errorf("stat file for prefetch: %w", err) 152 + } 153 + 154 + span.SetAttributes(attribute.Int64("shard_size", st.Size())) 155 + 156 + if st.Size() > prefetchThreshold { 157 + span.SetAttributes(attribute.Bool("no_prefetch", true)) 158 + return doBlockRead(fi, k, offset) 159 + } 160 + 161 + cr, err := car.NewCarReader(fi) 162 + if err != nil { 163 + return nil, err 164 + } 165 + 166 + for { 167 + blk, err := cr.Next() 168 + if err != nil { 169 + if err == io.EOF { 170 + break 171 + } 172 + return nil, err 173 + } 174 + 175 + uv.cache[blk.Cid()] = blk 176 + } 177 + 178 + outblk, ok := uv.cache[k] 179 + if !ok { 180 + return nil, fmt.Errorf("requested block was not found in car slice") 181 + } 182 + 183 + return outblk, nil 184 + } 185 + 186 + func (uv *userView) preloadBlocksFromFile(ctx context.Context, path string) error { 187 + fi, err := os.Open(path) 188 + if err != nil { 189 + return err 190 + } 191 + defer fi.Close() 192 + 193 + cr, err := car.NewCarReader(fi) 194 + if err != nil { 195 + return err 196 + } 197 + 198 + for { 199 + blk, err := cr.Next() 200 + if err != nil { 201 + if err == io.EOF { 202 + break 203 + } 204 + return err 205 + } 206 + 207 + uv.cache[blk.Cid()] = blk 208 + } 209 + 210 + return nil 211 + } 212 + 213 + func (uv *userView) singleRead(ctx context.Context, k cid.Cid, path string, offset int64) (blockformat.Block, error) { 214 + fi, err := os.Open(path) 215 + if err != nil { 216 + return nil, err 217 + } 218 + defer fi.Close() 219 + 220 + return doBlockRead(fi, k, offset) 221 + } 222 + 223 + func doBlockRead(fi *os.File, k cid.Cid, offset int64) (blockformat.Block, error) { 224 + seeked, err := fi.Seek(offset, io.SeekStart) 225 + if err != nil { 226 + return nil, err 227 + } 228 + 229 + if seeked != offset { 230 + return nil, fmt.Errorf("failed to seek to offset (%d != %d)", seeked, offset) 231 + } 232 + 233 + bufr := bufio.NewReader(fi) 234 + rcid, data, err := carutil.ReadNode(bufr) 235 + if err != nil { 236 + return nil, err 237 + } 238 + 239 + if rcid != k { 240 + return nil, fmt.Errorf("mismatch in cid on disk: %s != %s", rcid, k) 241 + } 242 + 243 + return blocks.NewBlockWithCid(data, rcid) 244 + } 245 + 246 + func (uv *userView) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { 247 + return nil, fmt.Errorf("not implemented") 248 + } 249 + 250 + func (uv *userView) Put(ctx context.Context, blk blockformat.Block) error { 251 + return fmt.Errorf("puts not supported to car view blockstores") 252 + } 253 + 254 + func (uv *userView) PutMany(ctx context.Context, blks []blockformat.Block) error { 255 + return fmt.Errorf("puts not supported to car view blockstores") 256 + } 257 + 258 + func (uv *userView) DeleteBlock(ctx context.Context, k cid.Cid) error { 259 + return fmt.Errorf("deletes not supported to car view blockstore") 260 + } 261 + 262 + func (uv *userView) GetSize(ctx context.Context, k cid.Cid) (int, error) { 263 + // TODO: maybe block size should be in the database record... 264 + blk, err := uv.Get(ctx, k) 265 + if err != nil { 266 + return 0, err 267 + } 268 + 269 + return len(blk.RawData()), nil 270 + } 271 + 272 + // subset of blockstore.Blockstore that we actually use here 273 + type minBlockstore interface { 274 + Get(ctx context.Context, bcid cid.Cid) (blockformat.Block, error) 275 + Has(ctx context.Context, bcid cid.Cid) (bool, error) 276 + GetSize(ctx context.Context, bcid cid.Cid) (int, error) 277 + } 278 + 279 + type DeltaSession struct { 280 + blks map[cid.Cid]blockformat.Block 281 + rmcids map[cid.Cid]bool 282 + base minBlockstore 283 + user models.Uid 284 + baseCid cid.Cid 285 + seq int 286 + readonly bool 287 + cs shardWriter 288 + lastRev string 289 + } 290 + 291 + func (cs *FileCarStore) checkLastShardCache(user models.Uid) *CarShard { 292 + return cs.lastShardCache.check(user) 293 + } 294 + 295 + func (cs *FileCarStore) removeLastShardCache(user models.Uid) { 296 + cs.lastShardCache.remove(user) 297 + } 298 + 299 + func (cs *FileCarStore) putLastShardCache(ls *CarShard) { 300 + cs.lastShardCache.put(ls) 301 + } 302 + 303 + func (cs *FileCarStore) getLastShard(ctx context.Context, user models.Uid) (*CarShard, error) { 304 + return cs.lastShardCache.get(ctx, user) 305 + } 306 + 307 + func (cs *FileCarStore) NewDeltaSession(ctx context.Context, user models.Uid, since *string) (carstore1.BlockStorage, error) { 308 + ctx, span := otel.Tracer("carstore2").Start(ctx, "NewSession") 309 + defer span.End() 310 + 311 + // TODO: ensure that we don't write updates on top of the wrong head 312 + // this needs to be a compare and swap type operation 313 + lastShard, err := cs.getLastShard(ctx, user) 314 + if err != nil { 315 + return nil, err 316 + } 317 + 318 + if since != nil && *since != lastShard.Rev { 319 + return nil, fmt.Errorf("revision mismatch: %s != %s: %w", *since, lastShard.Rev, carstore1.ErrRepoBaseMismatch) 320 + } 321 + uv := &userView{ 322 + user: user, 323 + prefetch: true, 324 + cache: make(map[cid.Cid]blockformat.Block), 325 + } 326 + 327 + if lastShard.ID != 0 { 328 + if err := uv.preloadBlocksFromFile(ctx, lastShard.Path); err != nil { 329 + return nil, fmt.Errorf("block prefetch failed: %w", err) 330 + } 331 + } 332 + 333 + return &DeltaSession{ 334 + blks: make(map[cid.Cid]blockformat.Block), 335 + base: uv, 336 + user: user, 337 + baseCid: lastShard.Root.CID, 338 + cs: cs, 339 + seq: lastShard.Seq + 1, 340 + lastRev: lastShard.Rev, 341 + }, nil 342 + } 343 + 344 + func (cs *FileCarStore) ReadOnlySession(user models.Uid) (carstore1.BlockStorage, error) { 345 + return &DeltaSession{ 346 + base: &userView{ 347 + user: user, 348 + prefetch: false, 349 + cache: make(map[cid.Cid]blockformat.Block), 350 + }, 351 + readonly: true, 352 + user: user, 353 + cs: cs, 354 + }, nil 355 + } 356 + 357 + // TODO: incremental is only ever called true, remove the param 358 + func (cs *FileCarStore) ReadUserCar(ctx context.Context, user models.Uid, sinceRev string, incremental bool, shardOut io.Writer) error { 359 + ctx, span := otel.Tracer("carstore").Start(ctx, "ReadUserCar") 360 + defer span.End() 361 + 362 + var earlySeq int 363 + if sinceRev != "" { 364 + var err error 365 + earlySeq, err = cs.meta.SeqForRev(ctx, user, sinceRev) 366 + if err != nil { 367 + return err 368 + } 369 + } 370 + 371 + shards, err := cs.meta.GetUserShardsDesc(ctx, user, earlySeq) 372 + if err != nil { 373 + return err 374 + } 375 + 376 + // TODO: incremental is only ever called true, so this is fine and we can remove the error check 377 + if !incremental && earlySeq > 0 { 378 + // have to do it the ugly way 379 + return fmt.Errorf("nyi") 380 + } 381 + 382 + if len(shards) == 0 { 383 + return fmt.Errorf("no data found for user %d", user) 384 + } 385 + 386 + // fast path! 387 + if err := car.WriteHeader(&car.CarHeader{ 388 + Roots: []cid.Cid{shards[0].Root.CID}, 389 + Version: 1, 390 + }, shardOut); err != nil { 391 + return err 392 + } 393 + 394 + for _, sh := range shards { 395 + if err := cs.writeShardBlocks(ctx, &sh, shardOut); err != nil { 396 + return err 397 + } 398 + } 399 + 400 + return nil 401 + } 402 + 403 + // inner loop part of ReadUserCar 404 + // copy shard blocks from disk to Writer 405 + func (cs *FileCarStore) writeShardBlocks(ctx context.Context, sh *CarShard, shardOut io.Writer) error { 406 + ctx, span := otel.Tracer("carstore").Start(ctx, "writeShardBlocks") 407 + defer span.End() 408 + 409 + fi, err := os.Open(sh.Path) 410 + if err != nil { 411 + return err 412 + } 413 + defer fi.Close() 414 + 415 + _, err = fi.Seek(sh.DataStart, io.SeekStart) 416 + if err != nil { 417 + return err 418 + } 419 + 420 + _, err = io.Copy(shardOut, fi) 421 + if err != nil { 422 + return err 423 + } 424 + 425 + return nil 426 + } 427 + 428 + // inner loop part of compactBucket 429 + func (cs *FileCarStore) iterateShardBlocks(ctx context.Context, sh *CarShard, cb func(blk blockformat.Block) error) error { 430 + fi, err := os.Open(sh.Path) 431 + if err != nil { 432 + return err 433 + } 434 + defer fi.Close() 435 + 436 + rr, err := car.NewCarReader(fi) 437 + if err != nil { 438 + return fmt.Errorf("opening shard car: %w", err) 439 + } 440 + 441 + for { 442 + blk, err := rr.Next() 443 + if err != nil { 444 + if err == io.EOF { 445 + return nil 446 + } 447 + return err 448 + } 449 + 450 + if err := cb(blk); err != nil { 451 + return err 452 + } 453 + } 454 + } 455 + 456 + var _ blockstore.Blockstore = (*DeltaSession)(nil) 457 + 458 + func (ds *DeltaSession) BaseCid() cid.Cid { 459 + return ds.baseCid 460 + } 461 + 462 + func (ds *DeltaSession) Put(ctx context.Context, b blockformat.Block) error { 463 + if ds.readonly { 464 + return fmt.Errorf("cannot write to readonly deltaSession") 465 + } 466 + ds.blks[b.Cid()] = b 467 + return nil 468 + } 469 + 470 + func (ds *DeltaSession) PutMany(ctx context.Context, bs []blockformat.Block) error { 471 + if ds.readonly { 472 + return fmt.Errorf("cannot write to readonly deltaSession") 473 + } 474 + 475 + for _, b := range bs { 476 + ds.blks[b.Cid()] = b 477 + } 478 + return nil 479 + } 480 + 481 + func (ds *DeltaSession) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { 482 + return nil, fmt.Errorf("AllKeysChan not implemented") 483 + } 484 + 485 + func (ds *DeltaSession) DeleteBlock(ctx context.Context, c cid.Cid) error { 486 + if ds.readonly { 487 + return fmt.Errorf("cannot write to readonly deltaSession") 488 + } 489 + 490 + if _, ok := ds.blks[c]; !ok { 491 + return ipld.ErrNotFound{Cid: c} 492 + } 493 + 494 + delete(ds.blks, c) 495 + return nil 496 + } 497 + 498 + func (ds *DeltaSession) Get(ctx context.Context, c cid.Cid) (blockformat.Block, error) { 499 + b, ok := ds.blks[c] 500 + if ok { 501 + return b, nil 502 + } 503 + 504 + return ds.base.Get(ctx, c) 505 + } 506 + 507 + func (ds *DeltaSession) Has(ctx context.Context, c cid.Cid) (bool, error) { 508 + _, ok := ds.blks[c] 509 + if ok { 510 + return true, nil 511 + } 512 + 513 + return ds.base.Has(ctx, c) 514 + } 515 + 516 + func (ds *DeltaSession) HashOnRead(hor bool) { 517 + // noop? 518 + } 519 + 520 + func (ds *DeltaSession) GetSize(ctx context.Context, c cid.Cid) (int, error) { 521 + b, ok := ds.blks[c] 522 + if ok { 523 + return len(b.RawData()), nil 524 + } 525 + 526 + return ds.base.GetSize(ctx, c) 527 + } 528 + 529 + func fnameForShard(user models.Uid, seq int) string { 530 + return fmt.Sprintf("sh-%d-%d", user, seq) 531 + } 532 + 533 + func (cs *FileCarStore) dirForUser(user models.Uid) string { 534 + return cs.rootDirs[int(user)%len(cs.rootDirs)] 535 + } 536 + 537 + func (cs *FileCarStore) openNewShardFile(ctx context.Context, user models.Uid, seq int) (*os.File, string, error) { 538 + // TODO: some overwrite protections 539 + fname := filepath.Join(cs.dirForUser(user), fnameForShard(user, seq)) 540 + fi, err := os.Create(fname) 541 + if err != nil { 542 + return nil, "", err 543 + } 544 + 545 + return fi, fname, nil 546 + } 547 + 548 + func (cs *FileCarStore) writeNewShardFile(ctx context.Context, user models.Uid, seq int, data []byte) (string, error) { 549 + _, span := otel.Tracer("carstore").Start(ctx, "writeNewShardFile") 550 + defer span.End() 551 + 552 + // TODO: some overwrite protections 553 + fname := filepath.Join(cs.dirForUser(user), fnameForShard(user, seq)) 554 + if err := os.WriteFile(fname, data, 0664); err != nil { 555 + return "", err 556 + } 557 + 558 + return fname, nil 559 + } 560 + 561 + func (cs *FileCarStore) deleteShardFile(ctx context.Context, sh *CarShard) error { 562 + return os.Remove(sh.Path) 563 + } 564 + 565 + // CloseWithRoot writes all new blocks in a car file to the writer with the 566 + // given cid as the 'root' 567 + func (ds *DeltaSession) CloseWithRoot(ctx context.Context, root cid.Cid, rev string) ([]byte, error) { 568 + ctx, span := otel.Tracer("carstore").Start(ctx, "CloseWithRoot") 569 + defer span.End() 570 + 571 + if ds.readonly { 572 + return nil, fmt.Errorf("cannot write to readonly deltaSession") 573 + } 574 + 575 + return ds.cs.writeNewShard(ctx, root, rev, ds.user, ds.seq, ds.blks, ds.rmcids) 576 + } 577 + 578 + func WriteCarHeader(w io.Writer, root cid.Cid) (int64, error) { 579 + h := &car.CarHeader{ 580 + Roots: []cid.Cid{root}, 581 + Version: 1, 582 + } 583 + hb, err := cbor.DumpObject(h) 584 + if err != nil { 585 + return 0, err 586 + } 587 + 588 + hnw, err := carstore.LdWrite(w, hb) 589 + if err != nil { 590 + return 0, err 591 + } 592 + 593 + return hnw, nil 594 + } 595 + 596 + // shardWriter.writeNewShard called from inside DeltaSession.CloseWithRoot 597 + type shardWriter interface { 598 + // writeNewShard stores blocks in `blks` arg and creates a new shard to propagate out to our firehose 599 + writeNewShard(ctx context.Context, root cid.Cid, rev string, user models.Uid, seq int, blks map[cid.Cid]blockformat.Block, rmcids map[cid.Cid]bool) ([]byte, error) 600 + } 601 + 602 + func blocksToCar(ctx context.Context, root cid.Cid, rev string, blks map[cid.Cid]blockformat.Block) ([]byte, error) { 603 + buf := new(bytes.Buffer) 604 + _, err := WriteCarHeader(buf, root) 605 + if err != nil { 606 + return nil, fmt.Errorf("failed to write car header: %w", err) 607 + } 608 + 609 + for k, blk := range blks { 610 + _, err := carstore.LdWrite(buf, k.Bytes(), blk.RawData()) 611 + if err != nil { 612 + return nil, fmt.Errorf("failed to write block: %w", err) 613 + } 614 + } 615 + 616 + return buf.Bytes(), nil 617 + } 618 + 619 + func (cs *FileCarStore) writeNewShard(ctx context.Context, root cid.Cid, rev string, user models.Uid, seq int, blks map[cid.Cid]blockformat.Block, rmcids map[cid.Cid]bool) ([]byte, error) { 620 + 621 + buf := new(bytes.Buffer) 622 + hnw, err := WriteCarHeader(buf, root) 623 + if err != nil { 624 + return nil, fmt.Errorf("failed to write car header: %w", err) 625 + } 626 + 627 + // TODO: writing these blocks in map traversal order is bad, I believe the 628 + // optimal ordering will be something like reverse-write-order, but random 629 + // is definitely not it 630 + 631 + offset := hnw 632 + //brefs := make([]*blockRef, 0, len(ds.blks)) 633 + brefs := make([]map[string]interface{}, 0, len(blks)) 634 + for k, blk := range blks { 635 + nw, err := carstore.LdWrite(buf, k.Bytes(), blk.RawData()) 636 + if err != nil { 637 + return nil, fmt.Errorf("failed to write block: %w", err) 638 + } 639 + 640 + /* 641 + brefs = append(brefs, &blockRef{ 642 + Cid: k.String(), 643 + Offset: offset, 644 + Shard: shard.ID, 645 + }) 646 + */ 647 + // adding things to the db by map is the only way to get gorm to not 648 + // add the 'returning' clause, which costs a lot of time 649 + brefs = append(brefs, map[string]interface{}{ 650 + "cid": models.DbCID{CID: k}, 651 + "offset": offset, 652 + }) 653 + 654 + offset += nw 655 + } 656 + 657 + start := time.Now() 658 + path, err := cs.writeNewShardFile(ctx, user, seq, buf.Bytes()) 659 + if err != nil { 660 + return nil, fmt.Errorf("failed to write shard file: %w", err) 661 + } 662 + writeShardFileDuration.Observe(time.Since(start).Seconds()) 663 + 664 + shard := CarShard{ 665 + Root: models.DbCID{CID: root}, 666 + DataStart: hnw, 667 + Seq: seq, 668 + Path: path, 669 + Usr: user, 670 + Rev: rev, 671 + } 672 + 673 + start = time.Now() 674 + if err := cs.putShard(ctx, &shard, brefs, rmcids, false); err != nil { 675 + return nil, err 676 + } 677 + writeShardMetadataDuration.Observe(time.Since(start).Seconds()) 678 + 679 + return buf.Bytes(), nil 680 + } 681 + 682 + func (cs *FileCarStore) putShard(ctx context.Context, shard *CarShard, brefs []map[string]any, rmcids map[cid.Cid]bool, nocache bool) error { 683 + ctx, span := otel.Tracer("carstore").Start(ctx, "putShard") 684 + defer span.End() 685 + 686 + err := cs.meta.PutShardAndRefs(ctx, shard, brefs, rmcids) 687 + if err != nil { 688 + return err 689 + } 690 + 691 + if !nocache { 692 + cs.putLastShardCache(shard) 693 + } 694 + 695 + return nil 696 + } 697 + 698 + func BlockDiff(ctx context.Context, bs blockstore.Blockstore, oldroot cid.Cid, newcids map[cid.Cid]blockformat.Block, skipcids map[cid.Cid]bool) (map[cid.Cid]bool, error) { 699 + ctx, span := otel.Tracer("repo").Start(ctx, "BlockDiff") 700 + defer span.End() 701 + 702 + if !oldroot.Defined() { 703 + return map[cid.Cid]bool{}, nil 704 + } 705 + 706 + // walk the entire 'new' portion of the tree, marking all referenced cids as 'keep' 707 + keepset := make(map[cid.Cid]bool) 708 + for c := range newcids { 709 + keepset[c] = true 710 + oblk, err := bs.Get(ctx, c) 711 + if err != nil { 712 + return nil, fmt.Errorf("get failed in new tree: %w", err) 713 + } 714 + 715 + if err := cbg.ScanForLinks(bytes.NewReader(oblk.RawData()), func(lnk cid.Cid) { 716 + keepset[lnk] = true 717 + }); err != nil { 718 + return nil, err 719 + } 720 + } 721 + 722 + if keepset[oldroot] { 723 + // this should probably never happen, but is technically correct 724 + return nil, nil 725 + } 726 + 727 + // next, walk the old tree from the root, recursing on cids *not* in the keepset. 728 + dropset := make(map[cid.Cid]bool) 729 + dropset[oldroot] = true 730 + queue := []cid.Cid{oldroot} 731 + 732 + for len(queue) > 0 { 733 + c := queue[0] 734 + queue = queue[1:] 735 + 736 + if skipcids != nil && skipcids[c] { 737 + continue 738 + } 739 + 740 + oblk, err := bs.Get(ctx, c) 741 + if err != nil { 742 + return nil, fmt.Errorf("get failed in old tree: %w", err) 743 + } 744 + 745 + if err := cbg.ScanForLinks(bytes.NewReader(oblk.RawData()), func(lnk cid.Cid) { 746 + if lnk.Prefix().Codec != cid.DagCBOR { 747 + return 748 + } 749 + 750 + if !keepset[lnk] { 751 + dropset[lnk] = true 752 + queue = append(queue, lnk) 753 + } 754 + }); err != nil { 755 + return nil, err 756 + } 757 + } 758 + 759 + return dropset, nil 760 + } 761 + 762 + func (cs *FileCarStore) ImportSlice(ctx context.Context, uid models.Uid, since *string, carslice []byte) (cid.Cid, carstore1.BlockStorage, error) { 763 + ctx, span := otel.Tracer("carstore").Start(ctx, "ImportSlice") 764 + defer span.End() 765 + 766 + carr, err := car.NewCarReader(bytes.NewReader(carslice)) 767 + if err != nil { 768 + return cid.Undef, nil, err 769 + } 770 + 771 + if len(carr.Header.Roots) != 1 { 772 + return cid.Undef, nil, fmt.Errorf("invalid car file, header must have a single root (has %d)", len(carr.Header.Roots)) 773 + } 774 + 775 + ds, err := cs.NewDeltaSession(ctx, uid, since) 776 + if err != nil { 777 + return cid.Undef, nil, fmt.Errorf("new delta session failed: %w", err) 778 + } 779 + 780 + var cids []cid.Cid 781 + for { 782 + blk, err := carr.Next() 783 + if err != nil { 784 + if err == io.EOF { 785 + break 786 + } 787 + return cid.Undef, nil, err 788 + } 789 + 790 + cids = append(cids, blk.Cid()) 791 + 792 + if err := ds.Put(ctx, blk); err != nil { 793 + return cid.Undef, nil, err 794 + } 795 + } 796 + 797 + return carr.Header.Roots[0], ds, nil 798 + } 799 + 800 + func (ds *DeltaSession) CalcDiff(ctx context.Context, skipcids map[cid.Cid]bool) error { 801 + rmcids, err := BlockDiff(ctx, ds, ds.baseCid, ds.blks, skipcids) 802 + if err != nil { 803 + return fmt.Errorf("block diff failed (base=%s,rev=%s): %w", ds.baseCid, ds.lastRev, err) 804 + } 805 + 806 + ds.rmcids = rmcids 807 + return nil 808 + } 809 + 810 + func (cs *FileCarStore) GetUserRepoHead(ctx context.Context, user models.Uid) (cid.Cid, error) { 811 + lastShard, err := cs.getLastShard(ctx, user) 812 + if err != nil { 813 + return cid.Undef, err 814 + } 815 + if lastShard.ID == 0 { 816 + return cid.Undef, nil 817 + } 818 + 819 + return lastShard.Root.CID, nil 820 + } 821 + 822 + func (cs *FileCarStore) GetUserRepoRev(ctx context.Context, user models.Uid) (string, error) { 823 + lastShard, err := cs.getLastShard(ctx, user) 824 + if err != nil { 825 + return "", err 826 + } 827 + if lastShard.ID == 0 { 828 + return "", nil 829 + } 830 + 831 + return lastShard.Rev, nil 832 + } 833 + 834 + func (cs *FileCarStore) Stat(ctx context.Context, usr models.Uid) ([]carstore1.UserStat, error) { 835 + shards, err := cs.meta.GetUserShards(ctx, usr) 836 + if err != nil { 837 + return nil, err 838 + } 839 + 840 + var out []carstore1.UserStat 841 + for _, s := range shards { 842 + out = append(out, carstore1.UserStat{ 843 + Seq: s.Seq, 844 + Root: s.Root.CID.String(), 845 + Created: s.CreatedAt, 846 + }) 847 + } 848 + 849 + return out, nil 850 + } 851 + 852 + func (cs *FileCarStore) WipeUserData(ctx context.Context, user models.Uid) error { 853 + shards, err := cs.meta.GetUserShards(ctx, user) 854 + if err != nil { 855 + return err 856 + } 857 + 858 + if err := cs.deleteShards(ctx, shards); err != nil { 859 + if !os.IsNotExist(err) { 860 + return err 861 + } 862 + } 863 + 864 + cs.removeLastShardCache(user) 865 + 866 + return nil 867 + } 868 + 869 + func (cs *FileCarStore) deleteShards(ctx context.Context, shs []CarShard) error { 870 + ctx, span := otel.Tracer("carstore").Start(ctx, "deleteShards") 871 + defer span.End() 872 + 873 + deleteSlice := func(ctx context.Context, subs []CarShard) error { 874 + ids := make([]uint, len(subs)) 875 + for i, sh := range subs { 876 + ids[i] = sh.ID 877 + } 878 + 879 + err := cs.meta.DeleteShardsAndRefs(ctx, ids) 880 + if err != nil { 881 + return err 882 + } 883 + 884 + for _, sh := range subs { 885 + if err := cs.deleteShardFile(ctx, &sh); err != nil { 886 + if !os.IsNotExist(err) { 887 + return err 888 + } 889 + cs.log.Warn("shard file we tried to delete did not exist", "shard", sh.ID, "path", sh.Path) 890 + } 891 + } 892 + 893 + return nil 894 + } 895 + 896 + chunkSize := 2000 897 + for i := 0; i < len(shs); i += chunkSize { 898 + sl := shs[i:] 899 + if len(sl) > chunkSize { 900 + sl = sl[:chunkSize] 901 + } 902 + 903 + if err := deleteSlice(ctx, sl); err != nil { 904 + return err 905 + } 906 + } 907 + 908 + return nil 909 + } 910 + 911 + type shardStat struct { 912 + ID uint 913 + Dirty int 914 + Seq int 915 + Total int 916 + 917 + refs []blockRef 918 + } 919 + 920 + func (s shardStat) dirtyFrac() float64 { 921 + return float64(s.Dirty) / float64(s.Total) 922 + } 923 + 924 + func aggrRefs(brefs []blockRef, shards map[uint]CarShard, staleCids map[cid.Cid]bool) []shardStat { 925 + byId := make(map[uint]*shardStat) 926 + 927 + for _, br := range brefs { 928 + s, ok := byId[br.Shard] 929 + if !ok { 930 + s = &shardStat{ 931 + ID: br.Shard, 932 + Seq: shards[br.Shard].Seq, 933 + } 934 + byId[br.Shard] = s 935 + } 936 + 937 + s.Total++ 938 + if staleCids[br.Cid.CID] { 939 + s.Dirty++ 940 + } 941 + 942 + s.refs = append(s.refs, br) 943 + } 944 + 945 + var out []shardStat 946 + for _, s := range byId { 947 + out = append(out, *s) 948 + } 949 + 950 + sort.Slice(out, func(i, j int) bool { 951 + return out[i].Seq < out[j].Seq 952 + }) 953 + 954 + return out 955 + } 956 + 957 + type compBucket struct { 958 + shards []shardStat 959 + 960 + cleanBlocks int 961 + expSize int 962 + } 963 + 964 + func (cb *compBucket) shouldCompact() bool { 965 + if len(cb.shards) == 0 { 966 + return false 967 + } 968 + 969 + if len(cb.shards) > 5 { 970 + return true 971 + } 972 + 973 + var frac float64 974 + for _, s := range cb.shards { 975 + frac += s.dirtyFrac() 976 + } 977 + frac /= float64(len(cb.shards)) 978 + 979 + if len(cb.shards) > 3 && frac > 0.2 { 980 + return true 981 + } 982 + 983 + return frac > 0.4 984 + } 985 + 986 + func (cb *compBucket) addShardStat(ss shardStat) { 987 + cb.cleanBlocks += (ss.Total - ss.Dirty) 988 + cb.shards = append(cb.shards, ss) 989 + } 990 + 991 + func (cb *compBucket) isEmpty() bool { 992 + return len(cb.shards) == 0 993 + } 994 + 995 + func (cs *FileCarStore) openNewCompactedShardFile(ctx context.Context, user models.Uid, seq int) (*os.File, string, error) { 996 + // TODO: some overwrite protections 997 + // NOTE CreateTemp is used for creating a non-colliding file, but we keep it and don't delete it so don't think of it as "temporary". 998 + // This creates "sh-%d-%d%s" with some random stuff in the last position 999 + fi, err := os.CreateTemp(cs.dirForUser(user), fnameForShard(user, seq)) 1000 + if err != nil { 1001 + return nil, "", err 1002 + } 1003 + 1004 + return fi, fi.Name(), nil 1005 + } 1006 + 1007 + type CompactionTarget struct { 1008 + Usr models.Uid 1009 + NumShards int 1010 + } 1011 + 1012 + func (cs *FileCarStore) GetCompactionTargets(ctx context.Context, shardCount int) ([]carstore1.CompactionTarget, error) { 1013 + ctx, span := otel.Tracer("carstore").Start(ctx, "GetCompactionTargets") 1014 + defer span.End() 1015 + 1016 + return cs.meta.GetCompactionTargets(ctx, shardCount) 1017 + } 1018 + 1019 + func shardSize(sh *CarShard) (int64, error) { 1020 + st, err := os.Stat(sh.Path) 1021 + if err != nil { 1022 + if os.IsNotExist(err) { 1023 + slog.Warn("missing shard, return size of zero", "path", sh.Path, "shard", sh.ID, "system", "carstore") 1024 + return 0, nil 1025 + } 1026 + return 0, fmt.Errorf("stat %q: %w", sh.Path, err) 1027 + } 1028 + 1029 + return st.Size(), nil 1030 + } 1031 + 1032 + func (cs *FileCarStore) CompactUserShards(ctx context.Context, user models.Uid, skipBigShards bool) (*carstore1.CompactionStats, error) { 1033 + ctx, span := otel.Tracer("carstore").Start(ctx, "CompactUserShards") 1034 + defer span.End() 1035 + 1036 + span.SetAttributes(attribute.Int64("user", int64(user))) 1037 + 1038 + shards, err := cs.meta.GetUserShards(ctx, user) 1039 + if err != nil { 1040 + return nil, err 1041 + } 1042 + 1043 + _ = shards 1044 + return nil, fmt.Errorf("TODO: have to redo all of compaction") 1045 + } 1046 + 1047 + func (cs *FileCarStore) deleteStaleRefs(ctx context.Context, uid models.Uid, brefs []blockRef, staleRefs []staleRef, removedShards map[uint]bool) error { 1048 + ctx, span := otel.Tracer("carstore").Start(ctx, "deleteStaleRefs") 1049 + defer span.End() 1050 + 1051 + brByCid := make(map[cid.Cid][]blockRef) 1052 + for _, br := range brefs { 1053 + brByCid[br.Cid.CID] = append(brByCid[br.Cid.CID], br) 1054 + } 1055 + 1056 + var staleToKeep []cid.Cid 1057 + for _, sr := range staleRefs { 1058 + cids, err := sr.getCids() 1059 + if err != nil { 1060 + return fmt.Errorf("getCids on staleRef failed (%d): %w", sr.ID, err) 1061 + } 1062 + 1063 + for _, c := range cids { 1064 + brs := brByCid[c] 1065 + del := true 1066 + for _, br := range brs { 1067 + if !removedShards[br.Shard] { 1068 + del = false 1069 + break 1070 + } 1071 + } 1072 + 1073 + if !del { 1074 + staleToKeep = append(staleToKeep, c) 1075 + } 1076 + } 1077 + } 1078 + 1079 + return cs.meta.SetStaleRef(ctx, uid, staleToKeep) 1080 + } 1081 + 1082 + func (cs *FileCarStore) compactBucket(ctx context.Context, user models.Uid, b *compBucket, shardsById map[uint]CarShard, keep map[cid.Cid]bool) error { 1083 + ctx, span := otel.Tracer("carstore").Start(ctx, "compactBucket") 1084 + defer span.End() 1085 + 1086 + span.SetAttributes(attribute.Int("shards", len(b.shards))) 1087 + 1088 + last := b.shards[len(b.shards)-1] 1089 + lastsh := shardsById[last.ID] 1090 + fi, path, err := cs.openNewCompactedShardFile(ctx, user, last.Seq) 1091 + if err != nil { 1092 + return fmt.Errorf("opening new file: %w", err) 1093 + } 1094 + 1095 + defer fi.Close() 1096 + root := lastsh.Root.CID 1097 + 1098 + hnw, err := WriteCarHeader(fi, root) 1099 + if err != nil { 1100 + return err 1101 + } 1102 + 1103 + offset := hnw 1104 + var nbrefs []map[string]any 1105 + written := make(map[cid.Cid]bool) 1106 + for _, s := range b.shards { 1107 + sh := shardsById[s.ID] 1108 + if err := cs.iterateShardBlocks(ctx, &sh, func(blk blockformat.Block) error { 1109 + if written[blk.Cid()] { 1110 + return nil 1111 + } 1112 + 1113 + if keep[blk.Cid()] { 1114 + nw, err := carstore.LdWrite(fi, blk.Cid().Bytes(), blk.RawData()) 1115 + if err != nil { 1116 + return fmt.Errorf("failed to write block: %w", err) 1117 + } 1118 + 1119 + nbrefs = append(nbrefs, map[string]interface{}{ 1120 + "cid": models.DbCID{CID: blk.Cid()}, 1121 + "offset": offset, 1122 + }) 1123 + 1124 + offset += nw 1125 + written[blk.Cid()] = true 1126 + } 1127 + return nil 1128 + }); err != nil { 1129 + // If we ever fail to iterate a shard file because its 1130 + // corrupted, just log an error and skip the shard 1131 + cs.log.Error("iterating blocks in shard", "shard", s.ID, "err", err, "uid", user) 1132 + } 1133 + } 1134 + 1135 + shard := CarShard{ 1136 + Root: models.DbCID{CID: root}, 1137 + DataStart: hnw, 1138 + Seq: lastsh.Seq, 1139 + Path: path, 1140 + Usr: user, 1141 + Rev: lastsh.Rev, 1142 + } 1143 + 1144 + if err := cs.putShard(ctx, &shard, nbrefs, nil, true); err != nil { 1145 + // if writing the shard fails, we should also delete the file 1146 + _ = fi.Close() 1147 + 1148 + if err2 := os.Remove(fi.Name()); err2 != nil { 1149 + cs.log.Error("failed to remove shard file after failed db transaction", "path", fi.Name(), "err", err2) 1150 + } 1151 + 1152 + return err 1153 + } 1154 + return nil 1155 + }
+71
repostore/last_shard_cache.go
··· 1 + package repostore 2 + 3 + import ( 4 + "context" 5 + "sync" 6 + 7 + "github.com/bluesky-social/indigo/models" 8 + "go.opentelemetry.io/otel" 9 + ) 10 + 11 + type LastShardSource interface { 12 + GetLastShard(context.Context, models.Uid) (*CarShard, error) 13 + } 14 + 15 + type lastShardCache struct { 16 + source LastShardSource 17 + 18 + lscLk sync.Mutex 19 + lastShardCache map[models.Uid]*CarShard 20 + } 21 + 22 + func (lsc *lastShardCache) Init() { 23 + lsc.lastShardCache = make(map[models.Uid]*CarShard) 24 + } 25 + 26 + func (lsc *lastShardCache) check(user models.Uid) *CarShard { 27 + lsc.lscLk.Lock() 28 + defer lsc.lscLk.Unlock() 29 + 30 + ls, ok := lsc.lastShardCache[user] 31 + if ok { 32 + return ls 33 + } 34 + 35 + return nil 36 + } 37 + 38 + func (lsc *lastShardCache) remove(user models.Uid) { 39 + lsc.lscLk.Lock() 40 + defer lsc.lscLk.Unlock() 41 + 42 + delete(lsc.lastShardCache, user) 43 + } 44 + 45 + func (lsc *lastShardCache) put(ls *CarShard) { 46 + if ls == nil { 47 + return 48 + } 49 + lsc.lscLk.Lock() 50 + defer lsc.lscLk.Unlock() 51 + 52 + lsc.lastShardCache[ls.Usr] = ls 53 + } 54 + 55 + func (lsc *lastShardCache) get(ctx context.Context, user models.Uid) (*CarShard, error) { 56 + ctx, span := otel.Tracer("carstore").Start(ctx, "getLastShard") 57 + defer span.End() 58 + 59 + maybeLs := lsc.check(user) 60 + if maybeLs != nil { 61 + return maybeLs, nil 62 + } 63 + 64 + lastShard, err := lsc.source.GetLastShard(ctx, user) 65 + if err != nil { 66 + return nil, err 67 + } 68 + 69 + lsc.put(lastShard) 70 + return lastShard, nil 71 + }
+262
repostore/meta_gorm.go
··· 1 + package repostore 2 + 3 + import ( 4 + "bytes" 5 + "context" 6 + "fmt" 7 + "io" 8 + "strconv" 9 + "strings" 10 + "time" 11 + 12 + carstore "github.com/bluesky-social/indigo/carstore" 13 + "github.com/bluesky-social/indigo/models" 14 + "github.com/ipfs/go-cid" 15 + "go.opentelemetry.io/otel" 16 + "gorm.io/gorm" 17 + ) 18 + 19 + type CarStoreGormMeta struct { 20 + meta *gorm.DB 21 + } 22 + 23 + func (cs *CarStoreGormMeta) Init() error { 24 + if err := cs.meta.AutoMigrate(&CarShard{}, &blockRef{}); err != nil { 25 + return err 26 + } 27 + if err := cs.meta.AutoMigrate(&staleRef{}); err != nil { 28 + return err 29 + } 30 + return nil 31 + } 32 + 33 + func (cs *CarStoreGormMeta) GetLastShard(ctx context.Context, user models.Uid) (*CarShard, error) { 34 + var lastShard CarShard 35 + if err := cs.meta.WithContext(ctx).Model(CarShard{}).Limit(1).Order("seq desc").Find(&lastShard, "usr = ?", user).Error; err != nil { 36 + return nil, err 37 + } 38 + return &lastShard, nil 39 + } 40 + 41 + // return all of a users's shards, ascending by Seq 42 + func (cs *CarStoreGormMeta) GetUserShards(ctx context.Context, usr models.Uid) ([]CarShard, error) { 43 + var shards []CarShard 44 + if err := cs.meta.Order("seq asc").Find(&shards, "usr = ?", usr).Error; err != nil { 45 + return nil, err 46 + } 47 + return shards, nil 48 + } 49 + 50 + // return all of a users's shards, descending by Seq 51 + func (cs *CarStoreGormMeta) GetUserShardsDesc(ctx context.Context, usr models.Uid, minSeq int) ([]CarShard, error) { 52 + var shards []CarShard 53 + if err := cs.meta.Order("seq desc").Find(&shards, "usr = ? AND seq >= ?", usr, minSeq).Error; err != nil { 54 + return nil, err 55 + } 56 + return shards, nil 57 + } 58 + 59 + func (cs *CarStoreGormMeta) GetUserStaleRefs(ctx context.Context, user models.Uid) ([]staleRef, error) { 60 + var staleRefs []staleRef 61 + if err := cs.meta.WithContext(ctx).Find(&staleRefs, "usr = ?", user).Error; err != nil { 62 + return nil, err 63 + } 64 + return staleRefs, nil 65 + } 66 + 67 + func (cs *CarStoreGormMeta) SeqForRev(ctx context.Context, user models.Uid, sinceRev string) (int, error) { 68 + var untilShard CarShard 69 + if err := cs.meta.Where("rev >= ? AND usr = ?", sinceRev, user).Order("rev").First(&untilShard).Error; err != nil { 70 + return 0, fmt.Errorf("finding early shard: %w", err) 71 + } 72 + return untilShard.Seq, nil 73 + } 74 + 75 + func (cs *CarStoreGormMeta) GetCompactionTargets(ctx context.Context, minShardCount int) ([]carstore.CompactionTarget, error) { 76 + var targets []carstore.CompactionTarget 77 + if err := cs.meta.Raw(`select usr, count(*) as num_shards from car_shards group by usr having count(*) > ? order by num_shards desc`, minShardCount).Scan(&targets).Error; err != nil { 78 + return nil, err 79 + } 80 + 81 + return targets, nil 82 + } 83 + 84 + func (cs *CarStoreGormMeta) PutShardAndRefs(ctx context.Context, shard *CarShard, brefs []map[string]any, rmcids map[cid.Cid]bool) error { 85 + // TODO: there should be a way to create the shard and block_refs that 86 + // reference it in the same query, would save a lot of time 87 + tx := cs.meta.WithContext(ctx).Begin() 88 + 89 + if err := tx.WithContext(ctx).Create(shard).Error; err != nil { 90 + return fmt.Errorf("failed to create shard in DB tx: %w", err) 91 + } 92 + 93 + if len(rmcids) > 0 { 94 + cids := make([]cid.Cid, 0, len(rmcids)) 95 + for c := range rmcids { 96 + cids = append(cids, c) 97 + } 98 + 99 + if err := tx.Create(&staleRef{ 100 + Cids: packCids(cids), 101 + Usr: shard.Usr, 102 + }).Error; err != nil { 103 + return err 104 + } 105 + } 106 + 107 + err := tx.WithContext(ctx).Commit().Error 108 + if err != nil { 109 + return fmt.Errorf("failed to commit shard DB transaction: %w", err) 110 + } 111 + return nil 112 + } 113 + 114 + func (cs *CarStoreGormMeta) DeleteShardsAndRefs(ctx context.Context, ids []uint) error { 115 + 116 + if err := cs.meta.Delete(&CarShard{}, "id in (?)", ids).Error; err != nil { 117 + return err 118 + } 119 + 120 + return nil 121 + } 122 + 123 + // valuesStatementForShards builds a postgres compatible statement string from int literals 124 + func valuesStatementForShards(shards []uint) string { 125 + sb := new(strings.Builder) 126 + for i, v := range shards { 127 + sb.WriteByte('(') 128 + sb.WriteString(strconv.Itoa(int(v))) 129 + sb.WriteByte(')') 130 + if i != len(shards)-1 { 131 + sb.WriteByte(',') 132 + } 133 + } 134 + return sb.String() 135 + } 136 + 137 + func (cs *CarStoreGormMeta) SetStaleRef(ctx context.Context, uid models.Uid, staleToKeep []cid.Cid) error { 138 + txn := cs.meta.Begin() 139 + 140 + if err := txn.Delete(&staleRef{}, "usr = ?", uid).Error; err != nil { 141 + return err 142 + } 143 + 144 + // now create a new staleRef with all the refs we couldn't clear out 145 + if len(staleToKeep) > 0 { 146 + if err := txn.Create(&staleRef{ 147 + Usr: uid, 148 + Cids: packCids(staleToKeep), 149 + }).Error; err != nil { 150 + return err 151 + } 152 + } 153 + 154 + if err := txn.Commit().Error; err != nil { 155 + return fmt.Errorf("failed to commit staleRef updates: %w", err) 156 + } 157 + return nil 158 + } 159 + 160 + type CarShard struct { 161 + ID uint `gorm:"primarykey"` 162 + CreatedAt time.Time 163 + 164 + Root models.DbCID `gorm:"index"` 165 + DataStart int64 166 + Seq int `gorm:"index:idx_car_shards_seq;index:idx_car_shards_usr_seq,priority:2,sort:desc"` 167 + Path string 168 + Usr models.Uid `gorm:"index:idx_car_shards_usr;index:idx_car_shards_usr_seq,priority:1"` 169 + Rev string 170 + } 171 + 172 + type blockRef struct { 173 + ID uint `gorm:"primarykey"` 174 + Cid models.DbCID `gorm:"index"` 175 + Shard uint `gorm:"index"` 176 + Offset int64 177 + //User uint `gorm:"index"` 178 + } 179 + 180 + type staleRef struct { 181 + ID uint `gorm:"primarykey"` 182 + Cid *models.DbCID 183 + Cids []byte 184 + Usr models.Uid `gorm:"index"` 185 + } 186 + 187 + func (sr *staleRef) getCids() ([]cid.Cid, error) { 188 + if sr.Cid != nil { 189 + return []cid.Cid{sr.Cid.CID}, nil 190 + } 191 + 192 + return unpackCids(sr.Cids) 193 + } 194 + 195 + func unpackCids(b []byte) ([]cid.Cid, error) { 196 + br := bytes.NewReader(b) 197 + var out []cid.Cid 198 + for { 199 + _, c, err := cid.CidFromReader(br) 200 + if err != nil { 201 + if err == io.EOF { 202 + break 203 + } 204 + return nil, err 205 + } 206 + 207 + out = append(out, c) 208 + } 209 + 210 + return out, nil 211 + } 212 + 213 + func packCids(cids []cid.Cid) []byte { 214 + buf := new(bytes.Buffer) 215 + for _, c := range cids { 216 + buf.Write(c.Bytes()) 217 + } 218 + 219 + return buf.Bytes() 220 + } 221 + 222 + func createBlockRefs(ctx context.Context, tx *gorm.DB, brefs []map[string]any) error { 223 + ctx, span := otel.Tracer("carstore").Start(ctx, "createBlockRefs") 224 + defer span.End() 225 + 226 + if err := createInBatches(ctx, tx, brefs, 2000); err != nil { 227 + return err 228 + } 229 + 230 + return nil 231 + } 232 + 233 + // Function to create in batches 234 + func createInBatches(ctx context.Context, tx *gorm.DB, brefs []map[string]any, batchSize int) error { 235 + for i := 0; i < len(brefs); i += batchSize { 236 + batch := brefs[i:] 237 + if len(batch) > batchSize { 238 + batch = batch[:batchSize] 239 + } 240 + 241 + query, values := generateInsertQuery(batch) 242 + 243 + if err := tx.WithContext(ctx).Exec(query, values...).Error; err != nil { 244 + return err 245 + } 246 + } 247 + return nil 248 + } 249 + 250 + func generateInsertQuery(brefs []map[string]any) (string, []any) { 251 + placeholders := strings.Repeat("(?, ?, ?),", len(brefs)) 252 + placeholders = placeholders[:len(placeholders)-1] // trim trailing comma 253 + 254 + query := "INSERT INTO block_refs (\"cid\", \"offset\", \"shard\") VALUES " + placeholders 255 + 256 + values := make([]any, 0, 3*len(brefs)) 257 + for _, entry := range brefs { 258 + values = append(values, entry["cid"], entry["offset"], entry["shard"]) 259 + } 260 + 261 + return query, values 262 + }
+18
repostore/metrics.go
··· 1 + package repostore 2 + 3 + import ( 4 + "github.com/prometheus/client_golang/prometheus" 5 + "github.com/prometheus/client_golang/prometheus/promauto" 6 + ) 7 + 8 + var writeShardFileDuration = promauto.NewHistogram(prometheus.HistogramOpts{ 9 + Name: "repostore_write_shard_file_duration", 10 + Help: "Duration of writing shard file to disk", 11 + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), 12 + }) 13 + 14 + var writeShardMetadataDuration = promauto.NewHistogram(prometheus.HistogramOpts{ 15 + Name: "repostore_write_shard_metadata_duration", 16 + Help: "Duration of writing shard metadata to DB", 17 + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), 18 + })
+633
repostore/repo_test.go
··· 1 + package repostore 2 + 3 + import ( 4 + "bytes" 5 + "context" 6 + "errors" 7 + "fmt" 8 + "io" 9 + "log/slog" 10 + "os" 11 + "path/filepath" 12 + "testing" 13 + "time" 14 + 15 + "github.com/bluesky-social/indigo/api/bsky" 16 + appbsky "github.com/bluesky-social/indigo/api/bsky" 17 + carstore "github.com/bluesky-social/indigo/carstore" 18 + "github.com/bluesky-social/indigo/repo" 19 + "github.com/bluesky-social/indigo/util" 20 + 21 + //sqlbs "github.com/ipfs/go-bs-sqlite3" 22 + "github.com/ipfs/go-cid" 23 + flatfs "github.com/ipfs/go-ds-flatfs" 24 + blockstore "github.com/ipfs/go-ipfs-blockstore" 25 + ipld "github.com/ipfs/go-ipld-format" 26 + "gorm.io/driver/sqlite" 27 + "gorm.io/gorm" 28 + ) 29 + 30 + func testCarStore(t testing.TB) (CarStore, func(), error) { 31 + tempdir, err := os.MkdirTemp("", "msttest-") 32 + if err != nil { 33 + return nil, nil, err 34 + } 35 + 36 + sharddir1 := filepath.Join(tempdir, "shards1") 37 + if err := os.MkdirAll(sharddir1, 0775); err != nil { 38 + return nil, nil, err 39 + } 40 + 41 + sharddir2 := filepath.Join(tempdir, "shards2") 42 + if err := os.MkdirAll(sharddir2, 0775); err != nil { 43 + return nil, nil, err 44 + } 45 + 46 + dbstr := "file::memory:" 47 + //dbstr := filepath.Join(tempdir, "foo.sqlite") 48 + db, err := gorm.Open(sqlite.Open(dbstr), 49 + &gorm.Config{ 50 + SkipDefaultTransaction: true, 51 + }) 52 + if err != nil { 53 + return nil, nil, err 54 + } 55 + 56 + cs, err := NewCarStore(db, []string{sharddir1, sharddir2}) 57 + if err != nil { 58 + return nil, nil, err 59 + } 60 + 61 + return cs, func() { 62 + _ = os.RemoveAll(tempdir) 63 + }, nil 64 + } 65 + 66 + type testFactory func(t testing.TB) (CarStore, func(), error) 67 + 68 + var backends = map[string]testFactory{ 69 + "cartore": testCarStore, 70 + } 71 + 72 + func testFlatfsBs() (blockstore.Blockstore, func(), error) { 73 + tempdir, err := os.MkdirTemp("", "msttest-") 74 + if err != nil { 75 + return nil, nil, err 76 + } 77 + 78 + ffds, err := flatfs.CreateOrOpen(tempdir, flatfs.IPFS_DEF_SHARD, false) 79 + if err != nil { 80 + return nil, nil, err 81 + } 82 + 83 + bs := blockstore.NewBlockstoreNoPrefix(ffds) 84 + 85 + return bs, func() { 86 + _ = os.RemoveAll(tempdir) 87 + }, nil 88 + } 89 + 90 + func TestBasicOperation(ot *testing.T) { 91 + ctx := context.TODO() 92 + 93 + for fname, tf := range backends { 94 + ot.Run(fname, func(t *testing.T) { 95 + 96 + cs, cleanup, err := tf(t) 97 + if err != nil { 98 + t.Fatal(err) 99 + } 100 + defer cleanup() 101 + 102 + ds, err := cs.NewDeltaSession(ctx, 1, nil) 103 + if err != nil { 104 + t.Fatal(err) 105 + } 106 + 107 + ncid, rev, err := setupRepo(ctx, ds, false) 108 + if err != nil { 109 + t.Fatal(err) 110 + } 111 + 112 + if _, err := ds.CloseWithRoot(ctx, ncid, rev); err != nil { 113 + t.Fatal(err) 114 + } 115 + 116 + var recs []cid.Cid 117 + head := ncid 118 + for i := 0; i < 10; i++ { 119 + ds, err := cs.NewDeltaSession(ctx, 1, &rev) 120 + if err != nil { 121 + t.Fatal(err) 122 + } 123 + 124 + rr, err := repo.OpenRepo(ctx, ds, head) 125 + if err != nil { 126 + t.Fatal(err) 127 + } 128 + 129 + rc, _, err := rr.CreateRecord(ctx, "app.bsky.feed.post", &appbsky.FeedPost{ 130 + Text: fmt.Sprintf("hey look its a tweet %d", time.Now().UnixNano()), 131 + }) 132 + if err != nil { 133 + t.Fatal(err) 134 + } 135 + 136 + recs = append(recs, rc) 137 + 138 + kmgr := &util.FakeKeyManager{} 139 + nroot, nrev, err := rr.Commit(ctx, kmgr.SignForUser) 140 + if err != nil { 141 + t.Fatal(err) 142 + } 143 + 144 + rev = nrev 145 + 146 + if err := ds.CalcDiff(ctx, nil); err != nil { 147 + t.Fatal(err) 148 + } 149 + 150 + if _, err := ds.CloseWithRoot(ctx, nroot, rev); err != nil { 151 + t.Fatal(err) 152 + } 153 + 154 + head = nroot 155 + } 156 + 157 + buf := new(bytes.Buffer) 158 + if err := cs.ReadUserCar(ctx, 1, "", true, buf); err != nil { 159 + t.Fatal(err) 160 + } 161 + checkRepo(t, cs, buf, recs) 162 + 163 + if _, err := cs.CompactUserShards(ctx, 1, false); err != nil { 164 + t.Log(err) 165 + // TODO: 166 + //t.Fatal(err) 167 + } 168 + 169 + buf = new(bytes.Buffer) 170 + if err := cs.ReadUserCar(ctx, 1, "", true, buf); err != nil { 171 + t.Fatal(err) 172 + } 173 + checkRepo(t, cs, buf, recs) 174 + }) 175 + } 176 + } 177 + 178 + func TestRepeatedCompactions(t *testing.T) { 179 + ctx := context.TODO() 180 + 181 + cs, cleanup, err := testCarStore(t) 182 + if err != nil { 183 + t.Fatal(err) 184 + } 185 + defer cleanup() 186 + 187 + ds, err := cs.NewDeltaSession(ctx, 1, nil) 188 + if err != nil { 189 + t.Fatal(err) 190 + } 191 + 192 + ncid, rev, err := setupRepo(ctx, ds, false) 193 + if err != nil { 194 + t.Fatal(err) 195 + } 196 + 197 + if _, err := ds.CloseWithRoot(ctx, ncid, rev); err != nil { 198 + t.Fatal(err) 199 + } 200 + 201 + var recs []cid.Cid 202 + head := ncid 203 + 204 + var lastRec string 205 + 206 + for loop := 0; loop < 50; loop++ { 207 + for i := 0; i < 20; i++ { 208 + ds, err := cs.NewDeltaSession(ctx, 1, &rev) 209 + if err != nil { 210 + t.Fatal(err) 211 + } 212 + 213 + rr, err := repo.OpenRepo(ctx, ds, head) 214 + if err != nil { 215 + t.Fatal(err) 216 + } 217 + if i%4 == 3 { 218 + if err := rr.DeleteRecord(ctx, lastRec); err != nil { 219 + t.Fatal(err) 220 + } 221 + recs = recs[:len(recs)-1] 222 + } else { 223 + rc, tid, err := rr.CreateRecord(ctx, "app.bsky.feed.post", &appbsky.FeedPost{ 224 + Text: fmt.Sprintf("hey look its a tweet %d", time.Now().UnixNano()), 225 + }) 226 + if err != nil { 227 + t.Fatal(err) 228 + } 229 + 230 + recs = append(recs, rc) 231 + lastRec = "app.bsky.feed.post/" + tid 232 + } 233 + 234 + kmgr := &util.FakeKeyManager{} 235 + nroot, nrev, err := rr.Commit(ctx, kmgr.SignForUser) 236 + if err != nil { 237 + t.Fatal(err) 238 + } 239 + 240 + rev = nrev 241 + 242 + if err := ds.CalcDiff(ctx, nil); err != nil { 243 + t.Fatal(err) 244 + } 245 + 246 + if _, err := ds.CloseWithRoot(ctx, nroot, rev); err != nil { 247 + t.Fatal(err) 248 + } 249 + 250 + head = nroot 251 + } 252 + fmt.Println("Run compaction", loop) 253 + st, err := cs.CompactUserShards(ctx, 1, false) 254 + if err != nil { 255 + t.Log(err) 256 + // TODO: 257 + //t.Fatal(err) 258 + } 259 + 260 + fmt.Printf("%#v\n", st) 261 + 262 + buf := new(bytes.Buffer) 263 + if err := cs.ReadUserCar(ctx, 1, "", true, buf); err != nil { 264 + t.Fatal(err) 265 + } 266 + checkRepo(t, cs, buf, recs) 267 + } 268 + 269 + buf := new(bytes.Buffer) 270 + if err := cs.ReadUserCar(ctx, 1, "", true, buf); err != nil { 271 + t.Fatal(err) 272 + } 273 + checkRepo(t, cs, buf, recs) 274 + } 275 + 276 + func checkRepo(t *testing.T, cs CarStore, r io.Reader, expRecs []cid.Cid) { 277 + t.Helper() 278 + rep, err := repo.ReadRepoFromCar(context.TODO(), r) 279 + if err != nil { 280 + t.Fatal("Reading repo: ", err) 281 + } 282 + 283 + set := make(map[cid.Cid]bool) 284 + for _, c := range expRecs { 285 + set[c] = true 286 + } 287 + 288 + if err := rep.ForEach(context.TODO(), "", func(k string, v cid.Cid) error { 289 + if !set[v] { 290 + return fmt.Errorf("have record we did not expect") 291 + } 292 + 293 + delete(set, v) 294 + return nil 295 + 296 + }); err != nil { 297 + var ierr ipld.ErrNotFound 298 + if errors.As(err, &ierr) { 299 + fmt.Println("matched error") 300 + bs, err := cs.ReadOnlySession(1) 301 + if err != nil { 302 + fmt.Println("could not read session: ", err) 303 + } 304 + 305 + blk, err := bs.Get(context.TODO(), ierr.Cid) 306 + if err != nil { 307 + fmt.Println("also failed the local get: ", err) 308 + } else { 309 + fmt.Println("LOCAL GET SUCCESS", len(blk.RawData())) 310 + } 311 + } 312 + 313 + t.Fatal("walking repo: ", err) 314 + } 315 + 316 + if len(set) > 0 { 317 + t.Fatalf("expected to find more cids in repo: %v", set) 318 + } 319 + 320 + } 321 + 322 + func setupRepo(ctx context.Context, bs blockstore.Blockstore, mkprofile bool) (cid.Cid, string, error) { 323 + nr := repo.NewRepo(ctx, "did:foo", bs) 324 + 325 + if mkprofile { 326 + _, err := nr.PutRecord(ctx, "app.bsky.actor.profile/self", &bsky.ActorProfile{}) 327 + if err != nil { 328 + return cid.Undef, "", fmt.Errorf("write record failed: %w", err) 329 + } 330 + } 331 + 332 + kmgr := &util.FakeKeyManager{} 333 + ncid, rev, err := nr.Commit(ctx, kmgr.SignForUser) 334 + if err != nil { 335 + return cid.Undef, "", fmt.Errorf("commit failed: %w", err) 336 + } 337 + 338 + return ncid, rev, nil 339 + } 340 + 341 + func BenchmarkRepoWritesCarstore(b *testing.B) { 342 + ctx := context.TODO() 343 + 344 + cs, cleanup, err := testCarStore(b) 345 + innerBenchmarkRepoWritesCarstore(b, ctx, cs, cleanup, err) 346 + } 347 + 348 + func innerBenchmarkRepoWritesCarstore(b *testing.B, ctx context.Context, cs CarStore, cleanup func(), err error) { 349 + if err != nil { 350 + b.Fatal(err) 351 + } 352 + defer cleanup() 353 + 354 + ds, err := cs.NewDeltaSession(ctx, 1, nil) 355 + if err != nil { 356 + b.Fatal(err) 357 + } 358 + 359 + ncid, rev, err := setupRepo(ctx, ds, false) 360 + if err != nil { 361 + b.Fatal(err) 362 + } 363 + 364 + if _, err := ds.CloseWithRoot(ctx, ncid, rev); err != nil { 365 + b.Fatal(err) 366 + } 367 + 368 + head := ncid 369 + b.ResetTimer() 370 + for i := 0; i < b.N; i++ { 371 + ds, err := cs.NewDeltaSession(ctx, 1, &rev) 372 + if err != nil { 373 + b.Fatal(err) 374 + } 375 + 376 + rr, err := repo.OpenRepo(ctx, ds, head) 377 + if err != nil { 378 + b.Fatal(err) 379 + } 380 + 381 + if _, _, err := rr.CreateRecord(ctx, "app.bsky.feed.post", &appbsky.FeedPost{ 382 + Text: fmt.Sprintf("hey look its a tweet %s", time.Now()), 383 + }); err != nil { 384 + b.Fatal(err) 385 + } 386 + 387 + kmgr := &util.FakeKeyManager{} 388 + nroot, nrev, err := rr.Commit(ctx, kmgr.SignForUser) 389 + if err != nil { 390 + b.Fatal(err) 391 + } 392 + 393 + rev = nrev 394 + if err := ds.CalcDiff(ctx, nil); err != nil { 395 + b.Fatal(err) 396 + } 397 + 398 + if _, err := ds.CloseWithRoot(ctx, nroot, rev); err != nil { 399 + b.Fatal(err) 400 + } 401 + 402 + head = nroot 403 + } 404 + } 405 + 406 + func BenchmarkRepoWritesFlatfs(b *testing.B) { 407 + ctx := context.TODO() 408 + 409 + bs, cleanup, err := testFlatfsBs() 410 + if err != nil { 411 + b.Fatal(err) 412 + } 413 + defer cleanup() 414 + 415 + ncid, _, err := setupRepo(ctx, bs, false) 416 + if err != nil { 417 + b.Fatal(err) 418 + } 419 + 420 + head := ncid 421 + b.ResetTimer() 422 + for i := 0; i < b.N; i++ { 423 + 424 + rr, err := repo.OpenRepo(ctx, bs, head) 425 + if err != nil { 426 + b.Fatal(err) 427 + } 428 + 429 + if _, _, err := rr.CreateRecord(ctx, "app.bsky.feed.post", &appbsky.FeedPost{ 430 + Text: fmt.Sprintf("hey look its a tweet %s", time.Now()), 431 + }); err != nil { 432 + b.Fatal(err) 433 + } 434 + 435 + kmgr := &util.FakeKeyManager{} 436 + nroot, _, err := rr.Commit(ctx, kmgr.SignForUser) 437 + if err != nil { 438 + b.Fatal(err) 439 + } 440 + 441 + head = nroot 442 + } 443 + } 444 + 445 + /* NOTE(bnewbold): this depends on github.com/ipfs/go-bs-sqlite3, which rewrote git history (?) breaking the dependency tree. We can roll forward, but that will require broad dependency updates. So for now just removing this benchmark/perf test. 446 + func BenchmarkRepoWritesSqlite(b *testing.B) { 447 + ctx := context.TODO() 448 + 449 + bs, err := sqlbs.Open("file::memory:", sqlbs.Options{}) 450 + if err != nil { 451 + b.Fatal(err) 452 + } 453 + 454 + ncid, _, err := setupRepo(ctx, bs, false) 455 + if err != nil { 456 + b.Fatal(err) 457 + } 458 + 459 + head := ncid 460 + b.ResetTimer() 461 + for i := 0; i < b.N; i++ { 462 + 463 + rr, err := repo.OpenRepo(ctx, bs, head) 464 + if err != nil { 465 + b.Fatal(err) 466 + } 467 + 468 + if _, _, err := rr.CreateRecord(ctx, "app.bsky.feed.post", &appbsky.FeedPost{ 469 + Text: fmt.Sprintf("hey look its a tweet %s", time.Now()), 470 + }); err != nil { 471 + b.Fatal(err) 472 + } 473 + 474 + kmgr := &util.FakeKeyManager{} 475 + nroot, _, err := rr.Commit(ctx, kmgr.SignForUser) 476 + if err != nil { 477 + b.Fatal(err) 478 + } 479 + 480 + head = nroot 481 + } 482 + } 483 + */ 484 + 485 + func TestDuplicateBlockAcrossShards(ot *testing.T) { 486 + ctx := context.TODO() 487 + 488 + for fname, tf := range backends { 489 + ot.Run(fname, func(t *testing.T) { 490 + 491 + cs, cleanup, err := tf(t) 492 + if err != nil { 493 + t.Fatal(err) 494 + } 495 + defer cleanup() 496 + 497 + ds1, err := cs.NewDeltaSession(ctx, 1, nil) 498 + if err != nil { 499 + t.Fatal(err) 500 + } 501 + 502 + ds2, err := cs.NewDeltaSession(ctx, 2, nil) 503 + if err != nil { 504 + t.Fatal(err) 505 + } 506 + 507 + ds3, err := cs.NewDeltaSession(ctx, 3, nil) 508 + if err != nil { 509 + t.Fatal(err) 510 + } 511 + 512 + var cids []cid.Cid 513 + var revs []string 514 + for _, ds := range []carstore.BlockStorage{ds1, ds2, ds3} { 515 + ncid, rev, err := setupRepo(ctx, ds, true) 516 + if err != nil { 517 + t.Fatal(err) 518 + } 519 + 520 + if _, err := ds.CloseWithRoot(ctx, ncid, rev); err != nil { 521 + t.Fatal(err) 522 + } 523 + cids = append(cids, ncid) 524 + revs = append(revs, rev) 525 + } 526 + 527 + var recs []cid.Cid 528 + head := cids[1] 529 + rev := revs[1] 530 + for i := 0; i < 10; i++ { 531 + ds, err := cs.NewDeltaSession(ctx, 2, &rev) 532 + if err != nil { 533 + t.Fatal(err) 534 + } 535 + 536 + rr, err := repo.OpenRepo(ctx, ds, head) 537 + if err != nil { 538 + t.Fatal(err) 539 + } 540 + 541 + rc, _, err := rr.CreateRecord(ctx, "app.bsky.feed.post", &appbsky.FeedPost{ 542 + Text: fmt.Sprintf("hey look its a tweet %d", time.Now().UnixNano()), 543 + }) 544 + if err != nil { 545 + t.Fatal(err) 546 + } 547 + 548 + recs = append(recs, rc) 549 + 550 + kmgr := &util.FakeKeyManager{} 551 + nroot, nrev, err := rr.Commit(ctx, kmgr.SignForUser) 552 + if err != nil { 553 + t.Fatal(err) 554 + } 555 + 556 + rev = nrev 557 + 558 + if err := ds.CalcDiff(ctx, nil); err != nil { 559 + t.Fatal(err) 560 + } 561 + 562 + if _, err := ds.CloseWithRoot(ctx, nroot, rev); err != nil { 563 + t.Fatal(err) 564 + } 565 + 566 + head = nroot 567 + } 568 + 569 + // explicitly update the profile object 570 + { 571 + ds, err := cs.NewDeltaSession(ctx, 2, &rev) 572 + if err != nil { 573 + t.Fatal(err) 574 + } 575 + 576 + rr, err := repo.OpenRepo(ctx, ds, head) 577 + if err != nil { 578 + t.Fatal(err) 579 + } 580 + 581 + desc := "this is so unique" 582 + rc, err := rr.UpdateRecord(ctx, "app.bsky.actor.profile/self", &appbsky.ActorProfile{ 583 + Description: &desc, 584 + }) 585 + if err != nil { 586 + t.Fatal(err) 587 + } 588 + 589 + recs = append(recs, rc) 590 + 591 + kmgr := &util.FakeKeyManager{} 592 + nroot, nrev, err := rr.Commit(ctx, kmgr.SignForUser) 593 + if err != nil { 594 + t.Fatal(err) 595 + } 596 + 597 + rev = nrev 598 + 599 + if err := ds.CalcDiff(ctx, nil); err != nil { 600 + t.Fatal(err) 601 + } 602 + 603 + if _, err := ds.CloseWithRoot(ctx, nroot, rev); err != nil { 604 + t.Fatal(err) 605 + } 606 + 607 + head = nroot 608 + } 609 + 610 + buf := new(bytes.Buffer) 611 + if err := cs.ReadUserCar(ctx, 2, "", true, buf); err != nil { 612 + t.Fatal(err) 613 + } 614 + checkRepo(t, cs, buf, recs) 615 + }) 616 + } 617 + } 618 + 619 + type testWriter struct { 620 + t testing.TB 621 + } 622 + 623 + func (tw testWriter) Write(p []byte) (n int, err error) { 624 + tw.t.Log(string(p)) 625 + return len(p), nil 626 + } 627 + 628 + func slogForTest(t testing.TB) *slog.Logger { 629 + hopts := slog.HandlerOptions{ 630 + Level: slog.LevelDebug, 631 + } 632 + return slog.New(slog.NewTextHandler(&testWriter{t}, &hopts)) 633 + }