A container registry that uses the AT Protocol for manifest storage and S3 for blob storage. atcr.io
docker container atproto go
81
fork

Configure Feed

Select the types of activity you want to include in your feed.

change to transactions for database

+148 -111
+6
config-hold.example.yaml
··· 65 65 path: /var/lib/atcr-hold 66 66 # PDS signing key path. Defaults to {database.path}/signing.key. 67 67 key_path: "" 68 + # libSQL sync URL (libsql://...). Works with Turso cloud, Bunny DB, or self-hosted libsql-server. Leave empty for local-only SQLite. 69 + libsql_sync_url: "" 70 + # Auth token for libSQL sync. Required if libsql_sync_url is set. 71 + libsql_auth_token: "" 72 + # How often to sync with remote libSQL server. Default: 60s. 73 + libsql_sync_interval: 1m0s 68 74 # Admin panel settings. 69 75 admin: 70 76 # Enable the web-based admin panel for crew and storage management.
+9 -2
deploy/upcloud/configs/appview.yaml.tmpl
··· 13 13 default_hold_did: "{{.HoldDid}}" 14 14 oauth_key_path: "{{.BasePath}}/oauth/client.key" 15 15 client_name: Seamark 16 + test_mode: false 16 17 client_short_name: Seamark 17 18 registry_domains: 18 19 - "buoy.cr" ··· 27 28 cache_ttl: 15m0s 28 29 check_interval: 15m0s 29 30 jetstream: 30 - url: wss://jetstream2.us-west.bsky.network/subscribe 31 + urls: 32 + - wss://jetstream2.us-west.bsky.network/subscribe 33 + - wss://jetstream1.us-west.bsky.network/subscribe 34 + - wss://jetstream2.us-east.bsky.network/subscribe 35 + - wss://jetstream1.us-east.bsky.network/subscribe 31 36 backfill_enabled: true 32 - relay_endpoint: https://relay1.us-east.bsky.network 37 + relay_endpoints: 38 + - https://relay1.us-east.bsky.network 39 + - https://relay1.us-west.bsky.network 33 40 auth: 34 41 key_path: "{{.BasePath}}/auth/private-key.pem" 35 42 cert_path: "{{.BasePath}}/auth/private-key.crt"
+4
deploy/upcloud/configs/hold.yaml.tmpl
··· 17 17 addr: :8080 18 18 public_url: "https://{{.HoldDomain}}" 19 19 public: false 20 + test_mode: false 20 21 relay_endpoint: "" 21 22 read_timeout: 5m0s 22 23 write_timeout: 5m0s ··· 29 30 database: 30 31 path: "{{.BasePath}}" 31 32 key_path: "" 33 + libsql_sync_url: "" 34 + libsql_auth_token: "" 35 + libsql_sync_interval: 1m0s 32 36 admin: 33 37 enabled: true 34 38 quota:
+9 -17
pkg/appview/db/annotations.go
··· 1 1 package db 2 2 3 - import ( 4 - "database/sql" 5 - "time" 6 - ) 3 + import "time" 7 4 8 5 // GetRepositoryAnnotations retrieves all annotations for a repository 9 - func GetRepositoryAnnotations(db *sql.DB, did, repository string) (map[string]string, error) { 6 + func GetRepositoryAnnotations(db DBTX, did, repository string) (map[string]string, error) { 10 7 rows, err := db.Query(` 11 8 SELECT key, value 12 9 FROM repository_annotations ··· 30 27 } 31 28 32 29 // UpsertRepositoryAnnotations replaces all annotations for a repository 33 - // Only called when manifest has at least one non-empty annotation 34 - func UpsertRepositoryAnnotations(db *sql.DB, did, repository string, annotations map[string]string) error { 35 - tx, err := db.Begin() 36 - if err != nil { 37 - return err 38 - } 39 - defer tx.Rollback() 40 - 30 + // Only called when manifest has at least one non-empty annotation. 31 + // Atomicity is provided by the caller's transaction when used during backfill. 32 + func UpsertRepositoryAnnotations(db DBTX, did, repository string, annotations map[string]string) error { 41 33 // Delete existing annotations 42 - _, err = tx.Exec(` 34 + _, err := db.Exec(` 43 35 DELETE FROM repository_annotations 44 36 WHERE did = ? AND repository = ? 45 37 `, did, repository) ··· 48 40 } 49 41 50 42 // Insert new annotations 51 - stmt, err := tx.Prepare(` 43 + stmt, err := db.Prepare(` 52 44 INSERT INTO repository_annotations (did, repository, key, value, updated_at) 53 45 VALUES (?, ?, ?, ?, ?) 54 46 `) ··· 65 57 } 66 58 } 67 59 68 - return tx.Commit() 60 + return nil 69 61 } 70 62 71 63 // DeleteRepositoryAnnotations removes all annotations for a repository 72 - func DeleteRepositoryAnnotations(db *sql.DB, did, repository string) error { 64 + func DeleteRepositoryAnnotations(db DBTX, did, repository string) error { 73 65 _, err := db.Exec(` 74 66 DELETE FROM repository_annotations 75 67 WHERE did = ? AND repository = ?
+13
pkg/appview/db/dbtx.go
··· 1 + package db 2 + 3 + import "database/sql" 4 + 5 + // DBTX is an interface satisfied by both *sql.DB and *sql.Tx. 6 + // All query functions in this package accept DBTX to allow callers 7 + // to choose whether operations run in a transaction or standalone. 8 + type DBTX interface { 9 + Exec(query string, args ...any) (sql.Result, error) 10 + Query(query string, args ...any) (*sql.Rows, error) 11 + QueryRow(query string, args ...any) *sql.Row 12 + Prepare(query string) (*sql.Stmt, error) 13 + }
+2 -3
pkg/appview/db/delete.go
··· 2 2 3 3 import ( 4 4 "context" 5 - "database/sql" 6 5 "fmt" 7 6 "log/slog" 8 7 ) ··· 17 16 // 18 17 // This should be called AFTER remote cleanup (hold services, PDS records) 19 18 // since we need the OAuth tokens to authenticate those requests. 20 - func DeleteUserDataFull(db *sql.DB, oauthStore *OAuthStore, did string) error { 19 + func DeleteUserDataFull(db DBTX, oauthStore *OAuthStore, did string) error { 21 20 slog.Info("Starting full user data deletion", "did", did) 22 21 23 22 // 1. Delete non-cascading hold membership tables ··· 48 47 49 48 // deleteHoldMembershipData deletes non-cascading hold membership tables. 50 49 // These tables don't have foreign keys to the users table. 51 - func deleteHoldMembershipData(db *sql.DB, did string) error { 50 + func deleteHoldMembershipData(db DBTX, did string) error { 52 51 // Delete from hold_crew_approvals (where user is the approved member) 53 52 result, err := db.Exec(`DELETE FROM hold_crew_approvals WHERE user_did = ?`, did) 54 53 if err != nil {
+5 -5
pkg/appview/db/export.go
··· 75 75 76 76 // ExportUserData gathers all user data for GDPR export 77 77 // Only includes data we originate, not cached PDS data 78 - func ExportUserData(db *sql.DB, did string) (*UserDataExport, error) { 78 + func ExportUserData(db DBTX, did string) (*UserDataExport, error) { 79 79 export := &UserDataExport{ 80 80 ExportedAt: time.Now().UTC(), 81 81 ExportVersion: "1.0", ··· 128 128 } 129 129 130 130 // getDevicesForExport retrieves sanitized device records 131 - func getDevicesForExport(db *sql.DB, did string) ([]DeviceExport, error) { 131 + func getDevicesForExport(db DBTX, did string) ([]DeviceExport, error) { 132 132 rows, err := db.Query(` 133 133 SELECT id, name, ip_address, location, user_agent, created_at, last_used 134 134 FROM devices ··· 169 169 } 170 170 171 171 // getOAuthSessionsForExport retrieves sanitized OAuth session records 172 - func getOAuthSessionsForExport(db *sql.DB, did string) ([]OAuthSessionExport, error) { 172 + func getOAuthSessionsForExport(db DBTX, did string) ([]OAuthSessionExport, error) { 173 173 rows, err := db.Query(` 174 174 SELECT session_id, created_at, updated_at 175 175 FROM oauth_sessions ··· 199 199 } 200 200 201 201 // getUISessionsForExport retrieves sanitized UI session records 202 - func getUISessionsForExport(db *sql.DB, did string) ([]UISessionExport, error) { 202 + func getUISessionsForExport(db DBTX, did string) ([]UISessionExport, error) { 203 203 rows, err := db.Query(` 204 204 SELECT id, expires_at, created_at 205 205 FROM ui_sessions ··· 229 229 } 230 230 231 231 // getHoldMembershipsForExport retrieves hold approval and denial records 232 - func getHoldMembershipsForExport(db *sql.DB, did string) (HoldMembershipsExport, error) { 232 + func getHoldMembershipsForExport(db DBTX, did string) (HoldMembershipsExport, error) { 233 233 memberships := HoldMembershipsExport{ 234 234 Approvals: []HoldApprovalExport{}, 235 235 Denials: []HoldDenialExport{},
+9 -9
pkg/appview/db/hold_store.go
··· 30 30 31 31 // GetCaptainRecord retrieves a captain record from the cache 32 32 // Returns nil if not found (cache miss) 33 - func GetCaptainRecord(db *sql.DB, holdDID string) (*HoldCaptainRecord, error) { 33 + func GetCaptainRecord(db DBTX, holdDID string) (*HoldCaptainRecord, error) { 34 34 query := ` 35 35 SELECT hold_did, owner_did, public, allow_all_crew, 36 36 deployed_at, region, updated_at ··· 71 71 } 72 72 73 73 // UpsertCaptainRecord inserts or updates a captain record in the cache 74 - func UpsertCaptainRecord(db *sql.DB, record *HoldCaptainRecord) error { 74 + func UpsertCaptainRecord(db DBTX, record *HoldCaptainRecord) error { 75 75 query := ` 76 76 INSERT INTO hold_captain_records ( 77 77 hold_did, owner_did, public, allow_all_crew, ··· 104 104 } 105 105 106 106 // ListHoldDIDs returns all known hold DIDs from the cache 107 - func ListHoldDIDs(db *sql.DB) ([]string, error) { 107 + func ListHoldDIDs(db DBTX) ([]string, error) { 108 108 query := ` 109 109 SELECT hold_did 110 110 FROM hold_captain_records ··· 143 143 144 144 // GetCaptainRecordsForOwner retrieves all captain records where the user is the owner 145 145 // Used for GDPR export to find all holds owned by a user 146 - func GetCaptainRecordsForOwner(db *sql.DB, ownerDID string) ([]*HoldCaptainRecord, error) { 146 + func GetCaptainRecordsForOwner(db DBTX, ownerDID string) ([]*HoldCaptainRecord, error) { 147 147 query := ` 148 148 SELECT hold_did, owner_did, public, allow_all_crew, 149 149 deployed_at, region, updated_at ··· 198 198 } 199 199 200 200 // DeleteCaptainRecord removes a captain record from the cache 201 - func DeleteCaptainRecord(db *sql.DB, holdDID string) error { 201 + func DeleteCaptainRecord(db DBTX, holdDID string) error { 202 202 // Note: hold_crew_members doesn't have CASCADE, so delete crew first 203 203 _, err := db.Exec(`DELETE FROM hold_crew_members WHERE hold_did = ?`, holdDID) 204 204 if err != nil { ··· 226 226 } 227 227 228 228 // UpsertCrewMember inserts or updates a crew member record 229 - func UpsertCrewMember(db *sql.DB, member *CrewMember) error { 229 + func UpsertCrewMember(db DBTX, member *CrewMember) error { 230 230 query := ` 231 231 INSERT INTO hold_crew_members ( 232 232 hold_did, member_did, rkey, role, permissions, tier, added_at, updated_at ··· 257 257 } 258 258 259 259 // DeleteCrewMemberByRkey removes a crew member by rkey (for delete events from Jetstream) 260 - func DeleteCrewMemberByRkey(db *sql.DB, holdDID, rkey string) error { 260 + func DeleteCrewMemberByRkey(db DBTX, holdDID, rkey string) error { 261 261 _, err := db.Exec(`DELETE FROM hold_crew_members WHERE hold_did = ? AND rkey = ?`, holdDID, rkey) 262 262 if err != nil { 263 263 return fmt.Errorf("failed to delete crew member by rkey: %w", err) ··· 278 278 279 279 // GetAvailableHolds returns all holds available to a user, grouped by membership type 280 280 // Results are ordered: owner first, then crew, then eligible, then public 281 - func GetAvailableHolds(db *sql.DB, userDID string) ([]AvailableHold, error) { 281 + func GetAvailableHolds(db DBTX, userDID string) ([]AvailableHold, error) { 282 282 query := ` 283 283 SELECT 284 284 h.hold_did, ··· 352 352 } 353 353 354 354 // GetCrewMemberships returns all holds where a user is a crew member 355 - func GetCrewMemberships(db *sql.DB, memberDID string) ([]CrewMember, error) { 355 + func GetCrewMemberships(db DBTX, memberDID string) ([]CrewMember, error) { 356 356 query := ` 357 357 SELECT hold_did, member_did, rkey, role, permissions, tier, added_at, created_at, updated_at 358 358 FROM hold_crew_members
+60 -67
pkg/appview/db/queries.go
··· 55 55 56 56 // SearchRepositories searches for repositories matching the query across handles, DIDs, repositories, and annotations 57 57 // Returns RepoCardData (one per repository) instead of individual pushes/tags 58 - func SearchRepositories(db *sql.DB, query string, limit, offset int, currentUserDID string) ([]RepoCardData, int, error) { 58 + func SearchRepositories(db DBTX, query string, limit, offset int, currentUserDID string) ([]RepoCardData, int, error) { 59 59 // Escape LIKE wildcards so they're treated literally 60 60 query = escapeLikePattern(query) 61 61 ··· 181 181 } 182 182 183 183 // GetUserRepositories fetches all repositories for a user 184 - func GetUserRepositories(db *sql.DB, did string) ([]Repository, error) { 184 + func GetUserRepositories(db DBTX, did string) ([]Repository, error) { 185 185 // Get repository summary 186 186 rows, err := db.Query(` 187 187 SELECT ··· 310 310 311 311 // GetRepositoryMetadata retrieves metadata for a repository from annotations table 312 312 // Returns a map of annotation key -> value for easy access in templates and handlers 313 - func GetRepositoryMetadata(db *sql.DB, did string, repository string) (map[string]string, error) { 313 + func GetRepositoryMetadata(db DBTX, did string, repository string) (map[string]string, error) { 314 314 return GetRepositoryAnnotations(db, did, repository) 315 315 } 316 316 317 317 // GetUserByDID retrieves a user by DID 318 - func GetUserByDID(db *sql.DB, did string) (*User, error) { 318 + func GetUserByDID(db DBTX, did string) (*User, error) { 319 319 var user User 320 320 var avatar sql.NullString 321 321 err := db.QueryRow(` ··· 340 340 } 341 341 342 342 // GetUserByHandle retrieves a user by handle 343 - func GetUserByHandle(db *sql.DB, handle string) (*User, error) { 343 + func GetUserByHandle(db DBTX, handle string) (*User, error) { 344 344 var user User 345 345 var avatar sql.NullString 346 346 err := db.QueryRow(` ··· 365 365 } 366 366 367 367 // UpsertUser inserts or updates a user record 368 - func UpsertUser(db *sql.DB, user *User) error { 368 + func UpsertUser(db DBTX, user *User) error { 369 369 _, err := db.Exec(` 370 370 INSERT INTO users (did, handle, pds_endpoint, avatar, last_seen) 371 371 VALUES (?, ?, ?, ?, ?) ··· 380 380 381 381 // UpsertUserIgnoreAvatar inserts or updates a user record, but preserves existing avatar on update 382 382 // This is useful when avatar fetch fails, and we don't want to overwrite an existing avatar with empty string 383 - func UpsertUserIgnoreAvatar(db *sql.DB, user *User) error { 383 + func UpsertUserIgnoreAvatar(db DBTX, user *User) error { 384 384 _, err := db.Exec(` 385 385 INSERT INTO users (did, handle, pds_endpoint, avatar, last_seen) 386 386 VALUES (?, ?, ?, ?, ?) ··· 394 394 395 395 // UpdateUserLastSeen updates only the last_seen timestamp for a user 396 396 // This is more efficient than UpsertUser when only updating activity timestamp 397 - func UpdateUserLastSeen(db *sql.DB, did string) error { 397 + func UpdateUserLastSeen(db DBTX, did string) error { 398 398 _, err := db.Exec(` 399 399 UPDATE users SET last_seen = ? WHERE did = ? 400 400 `, time.Now(), did) ··· 403 403 404 404 // UpdateUserHandle updates a user's handle when an identity change event is received 405 405 // This is called when Jetstream receives an identity event indicating a handle change 406 - func UpdateUserHandle(db *sql.DB, did string, newHandle string) error { 406 + func UpdateUserHandle(db DBTX, did string, newHandle string) error { 407 407 _, err := db.Exec(` 408 408 UPDATE users SET handle = ?, last_seen = ? WHERE did = ? 409 409 `, newHandle, time.Now(), did) ··· 412 412 413 413 // UpdateUserAvatar updates a user's avatar URL when a profile change is detected 414 414 // This is called when Jetstream receives an app.bsky.actor.profile update 415 - func UpdateUserAvatar(db *sql.DB, did string, avatarURL string) error { 415 + func UpdateUserAvatar(db DBTX, did string, avatarURL string) error { 416 416 _, err := db.Exec(` 417 417 UPDATE users SET avatar = ?, last_seen = ? WHERE did = ? 418 418 `, avatarURL, time.Now(), did) ··· 420 420 } 421 421 422 422 // GetManifestDigestsForDID returns all manifest digests for a DID 423 - func GetManifestDigestsForDID(db *sql.DB, did string) ([]string, error) { 423 + func GetManifestDigestsForDID(db DBTX, did string) ([]string, error) { 424 424 rows, err := db.Query(` 425 425 SELECT digest FROM manifests WHERE did = ? 426 426 `, did) ··· 442 442 } 443 443 444 444 // DeleteManifestsNotInList deletes all manifests for a DID that are not in the provided list 445 - func DeleteManifestsNotInList(db *sql.DB, did string, keepDigests []string) error { 445 + func DeleteManifestsNotInList(db DBTX, did string, keepDigests []string) error { 446 446 if len(keepDigests) == 0 { 447 447 // No manifests to keep - delete all for this DID 448 448 _, err := db.Exec(`DELETE FROM manifests WHERE did = ?`, did) ··· 467 467 } 468 468 469 469 // GetTagsForDID returns all (repository, tag) pairs for a DID 470 - func GetTagsForDID(db *sql.DB, did string) ([]struct{ Repository, Tag string }, error) { 470 + func GetTagsForDID(db DBTX, did string) ([]struct{ Repository, Tag string }, error) { 471 471 rows, err := db.Query(` 472 472 SELECT repository, tag FROM tags WHERE did = ? 473 473 `, did) ··· 488 488 return tags, rows.Err() 489 489 } 490 490 491 - // DeleteTagsNotInList deletes all tags for a DID that are not in the provided list 492 - func DeleteTagsNotInList(db *sql.DB, did string, keepTags []struct{ Repository, Tag string }) error { 491 + // DeleteTagsNotInList deletes all tags for a DID that are not in the provided list. 492 + // Atomicity is provided by the caller's transaction when used during backfill. 493 + func DeleteTagsNotInList(db DBTX, did string, keepTags []struct{ Repository, Tag string }) error { 493 494 if len(keepTags) == 0 { 494 495 // No tags to keep - delete all for this DID 495 496 _, err := db.Exec(`DELETE FROM tags WHERE did = ?`, did) 496 497 return err 497 498 } 498 499 499 - // For tags, we need to check (repository, tag) pairs 500 - // Build a DELETE query that excludes the pairs we want to keep 501 - tx, err := db.Begin() 502 - if err != nil { 503 - return err 504 - } 505 - defer tx.Rollback() 506 - 507 500 // First, get all current tags 508 - rows, err := tx.Query(`SELECT id, repository, tag FROM tags WHERE did = ?`, did) 501 + rows, err := db.Query(`SELECT id, repository, tag FROM tags WHERE did = ?`, did) 509 502 if err != nil { 510 503 return err 511 504 } ··· 536 529 537 530 // Delete tags not in keep list 538 531 for _, id := range toDelete { 539 - if _, err := tx.Exec(`DELETE FROM tags WHERE id = ?`, id); err != nil { 532 + if _, err := db.Exec(`DELETE FROM tags WHERE id = ?`, id); err != nil { 540 533 return err 541 534 } 542 535 } 543 536 544 - return tx.Commit() 537 + return nil 545 538 } 546 539 547 540 // InsertManifest inserts or updates a manifest record 548 541 // Uses UPSERT to update core metadata if manifest already exists 549 542 // Returns the manifest ID (works correctly for both insert and update) 550 543 // Note: Annotations are stored separately in repository_annotations table 551 - func InsertManifest(db *sql.DB, manifest *Manifest) (int64, error) { 544 + func InsertManifest(db DBTX, manifest *Manifest) (int64, error) { 552 545 _, err := db.Exec(` 553 546 INSERT INTO manifests 554 547 (did, repository, digest, hold_endpoint, schema_version, media_type, ··· 584 577 } 585 578 586 579 // InsertLayer inserts a new layer record 587 - func InsertLayer(db *sql.DB, layer *Layer) error { 580 + func InsertLayer(db DBTX, layer *Layer) error { 588 581 _, err := db.Exec(` 589 582 INSERT INTO layers (manifest_id, digest, size, media_type, layer_index) 590 583 VALUES (?, ?, ?, ?, ?) ··· 593 586 } 594 587 595 588 // UpsertTag inserts or updates a tag record 596 - func UpsertTag(db *sql.DB, tag *Tag) error { 589 + func UpsertTag(db DBTX, tag *Tag) error { 597 590 _, err := db.Exec(` 598 591 INSERT INTO tags (did, repository, tag, digest, created_at) 599 592 VALUES (?, ?, ?, ?, ?) ··· 605 598 } 606 599 607 600 // DeleteTag deletes a tag record 608 - func DeleteTag(db *sql.DB, did, repository, tag string) error { 601 + func DeleteTag(db DBTX, did, repository, tag string) error { 609 602 _, err := db.Exec(` 610 603 DELETE FROM tags WHERE did = ? AND repository = ? AND tag = ? 611 604 `, did, repository, tag) ··· 616 609 // Only multi-arch tags (manifest lists) have platform info in manifest_references 617 610 // Single-arch tags will have empty Platforms slice (platform is obvious for single-arch) 618 611 // Attestation references (unknown/unknown platforms) are filtered out but tracked via HasAttestations 619 - func GetTagsWithPlatforms(db *sql.DB, did, repository string) ([]TagWithPlatforms, error) { 612 + func GetTagsWithPlatforms(db DBTX, did, repository string) ([]TagWithPlatforms, error) { 620 613 rows, err := db.Query(` 621 614 SELECT 622 615 t.id, ··· 700 693 701 694 // DeleteManifest deletes a manifest and its associated layers 702 695 // If repository is empty, deletes all manifests matching did and digest 703 - func DeleteManifest(db *sql.DB, did, repository, digest string) error { 696 + func DeleteManifest(db DBTX, did, repository, digest string) error { 704 697 var err error 705 698 if repository == "" { 706 699 // Delete by DID + digest only (used when repository is unknown, e.g., Jetstream DELETE events) ··· 718 711 // 719 712 // Due to ON DELETE CASCADE in the schema, deleting from users will automatically 720 713 // cascade to: manifests, tags, layers, references, annotations, stars, repo_pages, etc. 721 - func DeleteUserData(db *sql.DB, did string) error { 714 + func DeleteUserData(db DBTX, did string) error { 722 715 result, err := db.Exec(`DELETE FROM users WHERE did = ?`, did) 723 716 if err != nil { 724 717 return fmt.Errorf("failed to delete user: %w", err) ··· 735 728 736 729 // GetManifest fetches a single manifest by digest 737 730 // Note: Annotations are stored separately in repository_annotations table 738 - func GetManifest(db *sql.DB, digest string) (*Manifest, error) { 731 + func GetManifest(db DBTX, digest string) (*Manifest, error) { 739 732 var m Manifest 740 733 741 734 err := db.QueryRow(` ··· 756 749 757 750 // GetNewestManifestForRepo returns the newest manifest for a specific repository 758 751 // Used by backfill to ensure annotations come from the most recent manifest 759 - func GetNewestManifestForRepo(db *sql.DB, did, repository string) (*Manifest, error) { 752 + func GetNewestManifestForRepo(db DBTX, did, repository string) (*Manifest, error) { 760 753 var m Manifest 761 754 err := db.QueryRow(` 762 755 SELECT id, did, repository, digest, hold_endpoint, schema_version, media_type, ··· 779 772 // GetLatestHoldDIDForRepo returns the hold DID from the most recent manifest for a repository 780 773 // Returns empty string if no manifests exist (e.g., first push) 781 774 // This is used instead of the in-memory cache to determine which hold to use for blob operations 782 - func GetLatestHoldDIDForRepo(db *sql.DB, did, repository string) (string, error) { 775 + func GetLatestHoldDIDForRepo(db DBTX, did, repository string) (string, error) { 783 776 var holdDID string 784 777 err := db.QueryRow(` 785 778 SELECT hold_endpoint ··· 802 795 803 796 // GetRepositoriesForDID returns all unique repository names for a DID 804 797 // Used by backfill to reconcile annotations for all repositories 805 - func GetRepositoriesForDID(db *sql.DB, did string) ([]string, error) { 798 + func GetRepositoriesForDID(db DBTX, did string) ([]string, error) { 806 799 rows, err := db.Query(` 807 800 SELECT DISTINCT repository 808 801 FROM manifests ··· 825 818 } 826 819 827 820 // GetLayersForManifest fetches all layers for a manifest 828 - func GetLayersForManifest(db *sql.DB, manifestID int64) ([]Layer, error) { 821 + func GetLayersForManifest(db DBTX, manifestID int64) ([]Layer, error) { 829 822 rows, err := db.Query(` 830 823 SELECT manifest_id, digest, size, media_type, layer_index 831 824 FROM layers ··· 851 844 } 852 845 853 846 // InsertManifestReference inserts a new manifest reference record (for manifest lists/indexes) 854 - func InsertManifestReference(db *sql.DB, ref *ManifestReference) error { 847 + func InsertManifestReference(db DBTX, ref *ManifestReference) error { 855 848 _, err := db.Exec(` 856 849 INSERT INTO manifest_references (manifest_id, digest, size, media_type, 857 850 platform_architecture, platform_os, ··· 866 859 } 867 860 868 861 // GetManifestReferencesForManifest fetches all manifest references for a manifest list/index 869 - func GetManifestReferencesForManifest(db *sql.DB, manifestID int64) ([]ManifestReference, error) { 862 + func GetManifestReferencesForManifest(db DBTX, manifestID int64) ([]ManifestReference, error) { 870 863 rows, err := db.Query(` 871 864 SELECT manifest_id, digest, size, media_type, 872 865 platform_architecture, platform_os, platform_variant, platform_os_version, ··· 914 907 // GetTopLevelManifests returns only manifest lists and orphaned single-arch manifests 915 908 // Filters out platform-specific manifests that are referenced by manifest lists 916 909 // Note: Annotations are stored separately in repository_annotations table - use GetRepositoryMetadata to fetch them 917 - func GetTopLevelManifests(db *sql.DB, did, repository string, limit, offset int) ([]ManifestWithMetadata, error) { 910 + func GetTopLevelManifests(db DBTX, did, repository string, limit, offset int) ([]ManifestWithMetadata, error) { 918 911 rows, err := db.Query(` 919 912 WITH manifest_list_children AS ( 920 913 -- Get all digests that are children of manifest lists ··· 1047 1040 1048 1041 // GetManifestDetail returns a manifest with full platform details and tags 1049 1042 // Note: Annotations are stored separately in repository_annotations table - use GetRepositoryMetadata to fetch them 1050 - func GetManifestDetail(db *sql.DB, did, repository, digest string) (*ManifestWithMetadata, error) { 1043 + func GetManifestDetail(db DBTX, did, repository, digest string) (*ManifestWithMetadata, error) { 1051 1044 // First, get the manifest and its tags 1052 1045 var m ManifestWithMetadata 1053 1046 var tags, configDigest sql.NullString ··· 1152 1145 } 1153 1146 1154 1147 // GetFirehoseCursor retrieves the current firehose cursor 1155 - func GetFirehoseCursor(db *sql.DB) (int64, error) { 1148 + func GetFirehoseCursor(db DBTX) (int64, error) { 1156 1149 var cursor int64 1157 1150 err := db.QueryRow("SELECT cursor FROM firehose_cursor WHERE id = 1").Scan(&cursor) 1158 1151 if err == sql.ErrNoRows { ··· 1162 1155 } 1163 1156 1164 1157 // UpdateFirehoseCursor updates the firehose cursor 1165 - func UpdateFirehoseCursor(db *sql.DB, cursor int64) error { 1158 + func UpdateFirehoseCursor(db DBTX, cursor int64) error { 1166 1159 _, err := db.Exec(` 1167 1160 INSERT INTO firehose_cursor (id, cursor, updated_at) 1168 1161 VALUES (1, ?, datetime('now')) ··· 1174 1167 } 1175 1168 1176 1169 // IsManifestTagged checks if a manifest has any tags 1177 - func IsManifestTagged(db *sql.DB, did, repository, digest string) (bool, error) { 1170 + func IsManifestTagged(db DBTX, did, repository, digest string) (bool, error) { 1178 1171 var count int 1179 1172 err := db.QueryRow(` 1180 1173 SELECT COUNT(*) FROM tags ··· 1189 1182 } 1190 1183 1191 1184 // GetManifestTags retrieves all tags for a manifest 1192 - func GetManifestTags(db *sql.DB, did, repository, digest string) ([]string, error) { 1185 + func GetManifestTags(db DBTX, did, repository, digest string) ([]string, error) { 1193 1186 rows, err := db.Query(` 1194 1187 SELECT tag FROM tags 1195 1188 WHERE did = ? AND repository = ? AND digest = ? ··· 1225 1218 } 1226 1219 1227 1220 // GetBackfillState retrieves the backfill state 1228 - func GetBackfillState(db *sql.DB) (*BackfillState, error) { 1221 + func GetBackfillState(db DBTX) (*BackfillState, error) { 1229 1222 var state BackfillState 1230 1223 var updatedAtStr string 1231 1224 ··· 1263 1256 } 1264 1257 1265 1258 // UpsertBackfillState updates or creates backfill state 1266 - func UpsertBackfillState(db *sql.DB, state *BackfillState) error { 1259 + func UpsertBackfillState(db DBTX, state *BackfillState) error { 1267 1260 _, err := db.Exec(` 1268 1261 INSERT INTO backfill_state (id, start_cursor, current_cursor, completed, updated_at) 1269 1262 VALUES (1, ?, ?, ?, datetime('now')) ··· 1277 1270 } 1278 1271 1279 1272 // UpdateBackfillCursor updates just the current cursor position 1280 - func UpdateBackfillCursor(db *sql.DB, cursor int64) error { 1273 + func UpdateBackfillCursor(db DBTX, cursor int64) error { 1281 1274 _, err := db.Exec(` 1282 1275 UPDATE backfill_state 1283 1276 SET current_cursor = ?, updated_at = datetime('now') ··· 1287 1280 } 1288 1281 1289 1282 // MarkBackfillCompleted marks the backfill as completed 1290 - func MarkBackfillCompleted(db *sql.DB) error { 1283 + func MarkBackfillCompleted(db DBTX) error { 1291 1284 _, err := db.Exec(` 1292 1285 UPDATE backfill_state 1293 1286 SET completed = 1, updated_at = datetime('now') ··· 1297 1290 } 1298 1291 1299 1292 // GetRepository fetches a specific repository for a user 1300 - func GetRepository(db *sql.DB, did, repository string) (*Repository, error) { 1293 + func GetRepository(db DBTX, did, repository string) (*Repository, error) { 1301 1294 // Get repository summary 1302 1295 var r Repository 1303 1296 r.Name = repository ··· 1412 1405 } 1413 1406 1414 1407 // GetRepositoryStats fetches stats for a repository 1415 - func GetRepositoryStats(db *sql.DB, did, repository string) (*RepositoryStats, error) { 1408 + func GetRepositoryStats(db DBTX, did, repository string) (*RepositoryStats, error) { 1416 1409 var stats RepositoryStats 1417 1410 var lastPullStr, lastPushStr sql.NullString 1418 1411 ··· 1463 1456 1464 1457 // UpsertRepositoryStats inserts or updates repository stats 1465 1458 // Note: star_count is calculated dynamically from the stars table, not stored here 1466 - func UpsertRepositoryStats(db *sql.DB, stats *RepositoryStats) error { 1459 + func UpsertRepositoryStats(db DBTX, stats *RepositoryStats) error { 1467 1460 _, err := db.Exec(` 1468 1461 INSERT INTO repository_stats (did, repository, pull_count, last_pull, push_count, last_push) 1469 1462 VALUES (?, ?, ?, ?, ?, ?) ··· 1477 1470 } 1478 1471 1479 1472 // UpsertStar inserts or updates a star record (idempotent) 1480 - func UpsertStar(db *sql.DB, starrerDID, ownerDID, repository string, createdAt time.Time) error { 1473 + func UpsertStar(db DBTX, starrerDID, ownerDID, repository string, createdAt time.Time) error { 1481 1474 _, err := db.Exec(` 1482 1475 INSERT INTO stars (starrer_did, owner_did, repository, created_at) 1483 1476 VALUES (?, ?, ?, ?) ··· 1488 1481 } 1489 1482 1490 1483 // DeleteStar deletes a star record 1491 - func DeleteStar(db *sql.DB, starrerDID, ownerDID, repository string) error { 1484 + func DeleteStar(db DBTX, starrerDID, ownerDID, repository string) error { 1492 1485 _, err := db.Exec(` 1493 1486 DELETE FROM stars 1494 1487 WHERE starrer_did = ? AND owner_did = ? AND repository = ? ··· 1497 1490 } 1498 1491 1499 1492 // RebuildStarCount rebuilds the star count for a specific repository from the stars table 1500 - func RebuildStarCount(db *sql.DB, ownerDID, repository string) error { 1493 + func RebuildStarCount(db DBTX, ownerDID, repository string) error { 1501 1494 _, err := db.Exec(` 1502 1495 INSERT INTO repository_stats (did, repository, star_count) 1503 1496 VALUES (?, ?, ( ··· 1515 1508 1516 1509 // GetStarsForDID returns all stars created by a specific DID (for backfill reconciliation) 1517 1510 // Returns a map of (ownerDID, repository) -> createdAt 1518 - func GetStarsForDID(db *sql.DB, starrerDID string) (map[string]time.Time, error) { 1511 + func GetStarsForDID(db DBTX, starrerDID string) (map[string]time.Time, error) { 1519 1512 rows, err := db.Query(` 1520 1513 SELECT owner_did, repository, created_at 1521 1514 FROM stars ··· 1542 1535 1543 1536 // CleanupOrphanedTags removes tags whose manifest digest no longer exists 1544 1537 // This handles cases where manifests were deleted but tags pointing to them remain 1545 - func CleanupOrphanedTags(db *sql.DB, did string) error { 1538 + func CleanupOrphanedTags(db DBTX, did string) error { 1546 1539 _, err := db.Exec(` 1547 1540 DELETE FROM tags 1548 1541 WHERE did = ? ··· 1557 1550 1558 1551 // DeleteStarsNotInList deletes stars from the database that are not in the provided list 1559 1552 // This is used during backfill reconciliation to remove stars that no longer exist on PDS 1560 - func DeleteStarsNotInList(db *sql.DB, starrerDID string, foundStars map[string]time.Time) error { 1553 + func DeleteStarsNotInList(db DBTX, starrerDID string, foundStars map[string]time.Time) error { 1561 1554 // Get current stars in DB 1562 1555 currentStars, err := GetStarsForDID(db, starrerDID) 1563 1556 if err != nil { ··· 1608 1601 // HoldDIDDB wraps a sql.DB and implements the HoldDIDLookup interface for middleware 1609 1602 // This is a minimal wrapper that only provides hold DID lookups for blob routing 1610 1603 type HoldDIDDB struct { 1611 - db *sql.DB 1604 + db DBTX 1612 1605 } 1613 1606 1614 1607 // NewHoldDIDDB creates a new hold DID database wrapper 1615 - func NewHoldDIDDB(db *sql.DB) *HoldDIDDB { 1608 + func NewHoldDIDDB(db DBTX) *HoldDIDDB { 1616 1609 return &HoldDIDDB{db: db} 1617 1610 } 1618 1611 ··· 1632 1625 ) 1633 1626 1634 1627 // GetRepoCards fetches repository cards with full data including Tag, Digest, and LastUpdated 1635 - func GetRepoCards(db *sql.DB, limit int, currentUserDID string, sortOrder RepoCardSortOrder) ([]RepoCardData, error) { 1628 + func GetRepoCards(db DBTX, limit int, currentUserDID string, sortOrder RepoCardSortOrder) ([]RepoCardData, error) { 1636 1629 // Build ORDER BY clause based on sort order 1637 1630 var orderBy string 1638 1631 switch sortOrder { ··· 1713 1706 } 1714 1707 1715 1708 // GetUserRepoCards fetches repository cards for a specific user with full data 1716 - func GetUserRepoCards(db *sql.DB, userDID string, currentUserDID string) ([]RepoCardData, error) { 1709 + func GetUserRepoCards(db DBTX, userDID string, currentUserDID string) ([]RepoCardData, error) { 1717 1710 query := ` 1718 1711 WITH latest_manifests AS ( 1719 1712 SELECT did, repository, MAX(id) as latest_id ··· 1795 1788 } 1796 1789 1797 1790 // UpsertRepoPage inserts or updates a repo page record 1798 - func UpsertRepoPage(db *sql.DB, did, repository, description, avatarCID string, createdAt, updatedAt time.Time) error { 1791 + func UpsertRepoPage(db DBTX, did, repository, description, avatarCID string, createdAt, updatedAt time.Time) error { 1799 1792 _, err := db.Exec(` 1800 1793 INSERT INTO repo_pages (did, repository, description, avatar_cid, created_at, updated_at) 1801 1794 VALUES (?, ?, ?, ?, ?, ?) ··· 1808 1801 } 1809 1802 1810 1803 // GetRepoPage retrieves a repo page record 1811 - func GetRepoPage(db *sql.DB, did, repository string) (*RepoPage, error) { 1804 + func GetRepoPage(db DBTX, did, repository string) (*RepoPage, error) { 1812 1805 var rp RepoPage 1813 1806 err := db.QueryRow(` 1814 1807 SELECT did, repository, description, avatar_cid, created_at, updated_at ··· 1822 1815 } 1823 1816 1824 1817 // DeleteRepoPage deletes a repo page record 1825 - func DeleteRepoPage(db *sql.DB, did, repository string) error { 1818 + func DeleteRepoPage(db DBTX, did, repository string) error { 1826 1819 _, err := db.Exec(` 1827 1820 DELETE FROM repo_pages WHERE did = ? AND repository = ? 1828 1821 `, did, repository) ··· 1830 1823 } 1831 1824 1832 1825 // GetRepoPagesByDID returns all repo pages for a DID 1833 - func GetRepoPagesByDID(db *sql.DB, did string) ([]RepoPage, error) { 1826 + func GetRepoPagesByDID(db DBTX, did string) ([]RepoPage, error) { 1834 1827 rows, err := db.Query(` 1835 1828 SELECT did, repository, description, avatar_cid, created_at, updated_at 1836 1829 FROM repo_pages
+29 -5
pkg/appview/jetstream/backfill.go
··· 181 181 return nil 182 182 } 183 183 184 - // backfillRepo backfills all records for a single repo/DID 184 + // backfillRepo backfills all records for a single repo/DID. 185 + // Per-record processing is wrapped in a single SQL transaction to batch writes 186 + // (one commit per repo instead of per-statement). 185 187 func (b *BackfillWorker) backfillRepo(ctx context.Context, did, collection string) (int, error) { 186 188 // Resolve DID to get user's PDS endpoint 187 189 pdsEndpoint, err := atproto.ResolveDIDToPDS(ctx, did) ··· 192 194 // Create a client for this user's PDS with the user's DID 193 195 // This allows GetRecord to work properly with the repo parameter 194 196 pdsClient := atproto.NewClient(pdsEndpoint, did, "") 197 + 198 + // Begin transaction for per-record processing (batches all writes into one commit) 199 + tx, err := b.db.Begin() 200 + if err != nil { 201 + return 0, fmt.Errorf("failed to begin transaction: %w", err) 202 + } 203 + defer tx.Rollback() 204 + 205 + // Create a transactional processor — all DB writes go through this tx 206 + txProcessor := NewProcessor(tx, false, b.processor.statsCache) 195 207 196 208 var recordCursor string 197 209 recordCount := 0 ··· 235 247 } 236 248 } 237 249 238 - if err := b.processRecord(ctx, did, collection, &record); err != nil { 250 + if err := b.processRecordWith(ctx, txProcessor, did, collection, &record); err != nil { 239 251 slog.Warn("Backfill failed to process record", "uri", record.URI, "error", err) 240 252 continue 241 253 } ··· 250 262 recordCursor = cursor 251 263 } 252 264 265 + // Commit all per-record writes in one batch 266 + if err := tx.Commit(); err != nil { 267 + return 0, fmt.Errorf("failed to commit transaction: %w", err) 268 + } 269 + 270 + // Reconciliation runs outside the transaction (involves network I/O and fewer writes) 271 + 253 272 // Reconcile deletions - remove records from DB that no longer exist on PDS 254 273 if err := b.reconcileDeletions(did, collection, foundManifestDigests, foundTags, foundStars); err != nil { 255 274 slog.Warn("Backfill failed to reconcile deletions", "did", did, "error", err) ··· 334 353 return nil 335 354 } 336 355 337 - // processRecord processes a single record using the unified ProcessRecord method. 338 - // This ensures consistent handling (validation, user creation) between Worker and Backfill. 356 + // processRecord processes a single record using the default processor. 339 357 func (b *BackfillWorker) processRecord(ctx context.Context, did, collection string, record *atproto.Record) error { 358 + return b.processRecordWith(ctx, b.processor, did, collection, record) 359 + } 360 + 361 + // processRecordWith processes a single record using the given processor. 362 + // This allows backfillRepo to use a transactional processor while other callers use the default. 363 + func (b *BackfillWorker) processRecordWith(ctx context.Context, proc *Processor, did, collection string, record *atproto.Record) error { 340 364 rkey := extractRkeyFromURI(record.URI) 341 365 342 366 // For sailor profile collection, we need to pass the queryCaptainFn ··· 346 370 queryCaptainFn = b.queryCaptainRecordWrapper 347 371 } 348 372 349 - return b.processor.ProcessRecord(ctx, did, collection, rkey, record.Value, false, queryCaptainFn) 373 + return proc.ProcessRecord(ctx, did, collection, rkey, record.Value, false, queryCaptainFn) 350 374 } 351 375 352 376 // queryCaptainRecordWrapper wraps queryCaptainRecord with backfill-specific logic
+2 -3
pkg/appview/jetstream/processor.go
··· 2 2 3 3 import ( 4 4 "context" 5 - "database/sql" 6 5 "encoding/json" 7 6 "fmt" 8 7 "log/slog" ··· 18 17 // Processor handles shared database operations for both Worker (live) and Backfill (sync) 19 18 // This eliminates code duplication between the two data ingestion paths 20 19 type Processor struct { 21 - db *sql.DB 20 + db db.DBTX 22 21 userCache *UserCache // Optional - enabled for Worker, disabled for Backfill 23 22 statsCache *StatsCache // In-memory cache for per-hold stats aggregation 24 23 useCache bool ··· 28 27 // NewProcessor creates a new shared processor 29 28 // useCache: true for Worker (live streaming), false for Backfill (batch processing) 30 29 // statsCache: shared stats cache for aggregating across holds (nil to skip stats processing) 31 - func NewProcessor(database *sql.DB, useCache bool, statsCache *StatsCache) *Processor { 30 + func NewProcessor(database db.DBTX, useCache bool, statsCache *StatsCache) *Processor { 32 31 // Create lexicon catalog for debug validation logging 33 32 dir := identity.DefaultDirectory() 34 33 catalog := lexicon.NewResolvingCatalog()