A container registry that uses the AT Protocol for manifest storage and S3 for blob storage.
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

change to transactions for database

+148 -111
+6
config-hold.example.yaml
··· 65 65 path: /var/lib/atcr-hold 66 66 # PDS signing key path. Defaults to {database.path}/signing.key. 67 67 key_path: "" 68 + # libSQL sync URL (libsql://...). Works with Turso cloud, Bunny DB, or self-hosted libsql-server. Leave empty for local-only SQLite. 69 + libsql_sync_url: "" 70 + # Auth token for libSQL sync. Required if libsql_sync_url is set. 71 + libsql_auth_token: "" 72 + # How often to sync with remote libSQL server. Default: 60s. 73 + libsql_sync_interval: 1m0s 68 74 # Admin panel settings. 69 75 admin: 70 76 # Enable the web-based admin panel for crew and storage management.
+9 -2
deploy/upcloud/configs/appview.yaml.tmpl
··· 13 13 default_hold_did: "{{.HoldDid}}" 14 14 oauth_key_path: "{{.BasePath}}/oauth/client.key" 15 15 client_name: Seamark 16 + test_mode: false 16 17 client_short_name: Seamark 17 18 registry_domains: 18 19 - "buoy.cr" ··· 27 28 cache_ttl: 15m0s 28 29 check_interval: 15m0s 29 30 jetstream: 30 - url: wss://jetstream2.us-west.bsky.network/subscribe 31 + urls: 32 + - wss://jetstream2.us-west.bsky.network/subscribe 33 + - wss://jetstream1.us-west.bsky.network/subscribe 34 + - wss://jetstream2.us-east.bsky.network/subscribe 35 + - wss://jetstream1.us-east.bsky.network/subscribe 31 36 backfill_enabled: true 32 - relay_endpoint: https://relay1.us-east.bsky.network 37 + relay_endpoints: 38 + - https://relay1.us-east.bsky.network 39 + - https://relay1.us-west.bsky.network 33 40 auth: 34 41 key_path: "{{.BasePath}}/auth/private-key.pem" 35 42 cert_path: "{{.BasePath}}/auth/private-key.crt"
+4
deploy/upcloud/configs/hold.yaml.tmpl
··· 17 17 addr: :8080 18 18 public_url: "https://{{.HoldDomain}}" 19 19 public: false 20 + test_mode: false 20 21 relay_endpoint: "" 21 22 read_timeout: 5m0s 22 23 write_timeout: 5m0s ··· 29 30 database: 30 31 path: "{{.BasePath}}" 31 32 key_path: "" 33 + libsql_sync_url: "" 34 + libsql_auth_token: "" 35 + libsql_sync_interval: 1m0s 32 36 admin: 33 37 enabled: true 34 38 quota:
+9 -17
pkg/appview/db/annotations.go
··· 1 1 package db 2 2 3 - import ( 4 - "database/sql" 5 - "time" 6 - ) 3 + import "time" 7 4 8 5 // GetRepositoryAnnotations retrieves all annotations for a repository 9 - func GetRepositoryAnnotations(db *sql.DB, did, repository string) (map[string]string, error) { 6 + func GetRepositoryAnnotations(db DBTX, did, repository string) (map[string]string, error) { 10 7 rows, err := db.Query(` 11 8 SELECT key, value 12 9 FROM repository_annotations ··· 30 27 } 31 28 32 29 // UpsertRepositoryAnnotations replaces all annotations for a repository 33 - // Only called when manifest has at least one non-empty annotation 34 - func UpsertRepositoryAnnotations(db *sql.DB, did, repository string, annotations map[string]string) error { 35 - tx, err := db.Begin() 36 - if err != nil { 37 - return err 38 - } 39 - defer tx.Rollback() 40 - 30 + // Only called when manifest has at least one non-empty annotation. 31 + // Atomicity is provided by the caller's transaction when used during backfill. 32 + func UpsertRepositoryAnnotations(db DBTX, did, repository string, annotations map[string]string) error { 41 33 // Delete existing annotations 42 - _, err = tx.Exec(` 34 + _, err := db.Exec(` 43 35 DELETE FROM repository_annotations 44 36 WHERE did = ? AND repository = ? 45 37 `, did, repository) ··· 48 40 } 49 41 50 42 // Insert new annotations 51 - stmt, err := tx.Prepare(` 43 + stmt, err := db.Prepare(` 52 44 INSERT INTO repository_annotations (did, repository, key, value, updated_at) 53 45 VALUES (?, ?, ?, ?, ?) 54 46 `) ··· 65 57 } 66 58 } 67 59 68 - return tx.Commit() 60 + return nil 69 61 } 70 62 71 63 // DeleteRepositoryAnnotations removes all annotations for a repository 72 - func DeleteRepositoryAnnotations(db *sql.DB, did, repository string) error { 64 + func DeleteRepositoryAnnotations(db DBTX, did, repository string) error { 73 65 _, err := db.Exec(` 74 66 DELETE FROM repository_annotations 75 67 WHERE did = ? AND repository = ?
+13
pkg/appview/db/dbtx.go
··· 1 + package db 2 + 3 + import "database/sql" 4 + 5 + // DBTX is an interface satisfied by both *sql.DB and *sql.Tx. 6 + // All query functions in this package accept DBTX to allow callers 7 + // to choose whether operations run in a transaction or standalone. 8 + type DBTX interface { 9 + Exec(query string, args ...any) (sql.Result, error) 10 + Query(query string, args ...any) (*sql.Rows, error) 11 + QueryRow(query string, args ...any) *sql.Row 12 + Prepare(query string) (*sql.Stmt, error) 13 + }
+2 -3
pkg/appview/db/delete.go
··· 2 2 3 3 import ( 4 4 "context" 5 - "database/sql" 6 5 "fmt" 7 6 "log/slog" 8 7 ) ··· 17 16 // 18 17 // This should be called AFTER remote cleanup (hold services, PDS records) 19 18 // since we need the OAuth tokens to authenticate those requests. 20 - func DeleteUserDataFull(db *sql.DB, oauthStore *OAuthStore, did string) error { 19 + func DeleteUserDataFull(db DBTX, oauthStore *OAuthStore, did string) error { 21 20 slog.Info("Starting full user data deletion", "did", did) 22 21 23 22 // 1. Delete non-cascading hold membership tables ··· 48 47 49 48 // deleteHoldMembershipData deletes non-cascading hold membership tables. 50 49 // These tables don't have foreign keys to the users table. 51 - func deleteHoldMembershipData(db *sql.DB, did string) error { 50 + func deleteHoldMembershipData(db DBTX, did string) error { 52 51 // Delete from hold_crew_approvals (where user is the approved member) 53 52 result, err := db.Exec(`DELETE FROM hold_crew_approvals WHERE user_did = ?`, did) 54 53 if err != nil {
+5 -5
pkg/appview/db/export.go
··· 75 75 76 76 // ExportUserData gathers all user data for GDPR export 77 77 // Only includes data we originate, not cached PDS data 78 - func ExportUserData(db *sql.DB, did string) (*UserDataExport, error) { 78 + func ExportUserData(db DBTX, did string) (*UserDataExport, error) { 79 79 export := &UserDataExport{ 80 80 ExportedAt: time.Now().UTC(), 81 81 ExportVersion: "1.0", ··· 128 128 } 129 129 130 130 // getDevicesForExport retrieves sanitized device records 131 - func getDevicesForExport(db *sql.DB, did string) ([]DeviceExport, error) { 131 + func getDevicesForExport(db DBTX, did string) ([]DeviceExport, error) { 132 132 rows, err := db.Query(` 133 133 SELECT id, name, ip_address, location, user_agent, created_at, last_used 134 134 FROM devices ··· 169 169 } 170 170 171 171 // getOAuthSessionsForExport retrieves sanitized OAuth session records 172 - func getOAuthSessionsForExport(db *sql.DB, did string) ([]OAuthSessionExport, error) { 172 + func getOAuthSessionsForExport(db DBTX, did string) ([]OAuthSessionExport, error) { 173 173 rows, err := db.Query(` 174 174 SELECT session_id, created_at, updated_at 175 175 FROM oauth_sessions ··· 199 199 } 200 200 201 201 // getUISessionsForExport retrieves sanitized UI session records 202 - func getUISessionsForExport(db *sql.DB, did string) ([]UISessionExport, error) { 202 + func getUISessionsForExport(db DBTX, did string) ([]UISessionExport, error) { 203 203 rows, err := db.Query(` 204 204 SELECT id, expires_at, created_at 205 205 FROM ui_sessions ··· 229 229 } 230 230 231 231 // getHoldMembershipsForExport retrieves hold approval and denial records 232 - func getHoldMembershipsForExport(db *sql.DB, did string) (HoldMembershipsExport, error) { 232 + func getHoldMembershipsForExport(db DBTX, did string) (HoldMembershipsExport, error) { 233 233 memberships := HoldMembershipsExport{ 234 234 Approvals: []HoldApprovalExport{}, 235 235 Denials: []HoldDenialExport{},
+9 -9
pkg/appview/db/hold_store.go
··· 30 30 31 31 // GetCaptainRecord retrieves a captain record from the cache 32 32 // Returns nil if not found (cache miss) 33 - func GetCaptainRecord(db *sql.DB, holdDID string) (*HoldCaptainRecord, error) { 33 + func GetCaptainRecord(db DBTX, holdDID string) (*HoldCaptainRecord, error) { 34 34 query := ` 35 35 SELECT hold_did, owner_did, public, allow_all_crew, 36 36 deployed_at, region, updated_at ··· 71 71 } 72 72 73 73 // UpsertCaptainRecord inserts or updates a captain record in the cache 74 - func UpsertCaptainRecord(db *sql.DB, record *HoldCaptainRecord) error { 74 + func UpsertCaptainRecord(db DBTX, record *HoldCaptainRecord) error { 75 75 query := ` 76 76 INSERT INTO hold_captain_records ( 77 77 hold_did, owner_did, public, allow_all_crew, ··· 104 104 } 105 105 106 106 // ListHoldDIDs returns all known hold DIDs from the cache 107 - func ListHoldDIDs(db *sql.DB) ([]string, error) { 107 + func ListHoldDIDs(db DBTX) ([]string, error) { 108 108 query := ` 109 109 SELECT hold_did 110 110 FROM hold_captain_records ··· 143 143 144 144 // GetCaptainRecordsForOwner retrieves all captain records where the user is the owner 145 145 // Used for GDPR export to find all holds owned by a user 146 - func GetCaptainRecordsForOwner(db *sql.DB, ownerDID string) ([]*HoldCaptainRecord, error) { 146 + func GetCaptainRecordsForOwner(db DBTX, ownerDID string) ([]*HoldCaptainRecord, error) { 147 147 query := ` 148 148 SELECT hold_did, owner_did, public, allow_all_crew, 149 149 deployed_at, region, updated_at ··· 198 198 } 199 199 200 200 // DeleteCaptainRecord removes a captain record from the cache 201 - func DeleteCaptainRecord(db *sql.DB, holdDID string) error { 201 + func DeleteCaptainRecord(db DBTX, holdDID string) error { 202 202 // Note: hold_crew_members doesn't have CASCADE, so delete crew first 203 203 _, err := db.Exec(`DELETE FROM hold_crew_members WHERE hold_did = ?`, holdDID) 204 204 if err != nil { ··· 226 226 } 227 227 228 228 // UpsertCrewMember inserts or updates a crew member record 229 - func UpsertCrewMember(db *sql.DB, member *CrewMember) error { 229 + func UpsertCrewMember(db DBTX, member *CrewMember) error { 230 230 query := ` 231 231 INSERT INTO hold_crew_members ( 232 232 hold_did, member_did, rkey, role, permissions, tier, added_at, updated_at ··· 257 257 } 258 258 259 259 // DeleteCrewMemberByRkey removes a crew member by rkey (for delete events from Jetstream) 260 - func DeleteCrewMemberByRkey(db *sql.DB, holdDID, rkey string) error { 260 + func DeleteCrewMemberByRkey(db DBTX, holdDID, rkey string) error { 261 261 _, err := db.Exec(`DELETE FROM hold_crew_members WHERE hold_did = ? AND rkey = ?`, holdDID, rkey) 262 262 if err != nil { 263 263 return fmt.Errorf("failed to delete crew member by rkey: %w", err) ··· 278 278 279 279 // GetAvailableHolds returns all holds available to a user, grouped by membership type 280 280 // Results are ordered: owner first, then crew, then eligible, then public 281 - func GetAvailableHolds(db *sql.DB, userDID string) ([]AvailableHold, error) { 281 + func GetAvailableHolds(db DBTX, userDID string) ([]AvailableHold, error) { 282 282 query := ` 283 283 SELECT 284 284 h.hold_did, ··· 352 352 } 353 353 354 354 // GetCrewMemberships returns all holds where a user is a crew member 355 - func GetCrewMemberships(db *sql.DB, memberDID string) ([]CrewMember, error) { 355 + func GetCrewMemberships(db DBTX, memberDID string) ([]CrewMember, error) { 356 356 query := ` 357 357 SELECT hold_did, member_did, rkey, role, permissions, tier, added_at, created_at, updated_at 358 358 FROM hold_crew_members
+60 -67
pkg/appview/db/queries.go
··· 55 55 56 56 // SearchRepositories searches for repositories matching the query across handles, DIDs, repositories, and annotations 57 57 // Returns RepoCardData (one per repository) instead of individual pushes/tags 58 - func SearchRepositories(db *sql.DB, query string, limit, offset int, currentUserDID string) ([]RepoCardData, int, error) { 58 + func SearchRepositories(db DBTX, query string, limit, offset int, currentUserDID string) ([]RepoCardData, int, error) { 59 59 // Escape LIKE wildcards so they're treated literally 60 60 query = escapeLikePattern(query) 61 61 ··· 181 181 } 182 182 183 183 // GetUserRepositories fetches all repositories for a user 184 - func GetUserRepositories(db *sql.DB, did string) ([]Repository, error) { 184 + func GetUserRepositories(db DBTX, did string) ([]Repository, error) { 185 185 // Get repository summary 186 186 rows, err := db.Query(` 187 187 SELECT ··· 310 310 311 311 // GetRepositoryMetadata retrieves metadata for a repository from annotations table 312 312 // Returns a map of annotation key -> value for easy access in templates and handlers 313 - func GetRepositoryMetadata(db *sql.DB, did string, repository string) (map[string]string, error) { 313 + func GetRepositoryMetadata(db DBTX, did string, repository string) (map[string]string, error) { 314 314 return GetRepositoryAnnotations(db, did, repository) 315 315 } 316 316 317 317 // GetUserByDID retrieves a user by DID 318 - func GetUserByDID(db *sql.DB, did string) (*User, error) { 318 + func GetUserByDID(db DBTX, did string) (*User, error) { 319 319 var user User 320 320 var avatar sql.NullString 321 321 err := db.QueryRow(` ··· 340 340 } 341 341 342 342 // GetUserByHandle retrieves a user by handle 343 - func GetUserByHandle(db *sql.DB, handle string) (*User, error) { 343 + func GetUserByHandle(db DBTX, handle string) (*User, error) { 344 344 var user User 345 345 var avatar sql.NullString 346 346 err := db.QueryRow(` ··· 365 365 } 366 366 367 367 // UpsertUser inserts or updates a user record 368 - func UpsertUser(db *sql.DB, user *User) error { 368 + func UpsertUser(db DBTX, user *User) error { 369 369 _, err := db.Exec(` 370 370 INSERT INTO users (did, handle, pds_endpoint, avatar, last_seen) 371 371 VALUES (?, ?, ?, ?, ?) ··· 380 380 381 381 // UpsertUserIgnoreAvatar inserts or updates a user record, but preserves existing avatar on update 382 382 // This is useful when avatar fetch fails, and we don't want to overwrite an existing avatar with empty string 383 - func UpsertUserIgnoreAvatar(db *sql.DB, user *User) error { 383 + func UpsertUserIgnoreAvatar(db DBTX, user *User) error { 384 384 _, err := db.Exec(` 385 385 INSERT INTO users (did, handle, pds_endpoint, avatar, last_seen) 386 386 VALUES (?, ?, ?, ?, ?) ··· 394 394 395 395 // UpdateUserLastSeen updates only the last_seen timestamp for a user 396 396 // This is more efficient than UpsertUser when only updating activity timestamp 397 - func UpdateUserLastSeen(db *sql.DB, did string) error { 397 + func UpdateUserLastSeen(db DBTX, did string) error { 398 398 _, err := db.Exec(` 399 399 UPDATE users SET last_seen = ? WHERE did = ? 400 400 `, time.Now(), did) ··· 403 403 404 404 // UpdateUserHandle updates a user's handle when an identity change event is received 405 405 // This is called when Jetstream receives an identity event indicating a handle change 406 - func UpdateUserHandle(db *sql.DB, did string, newHandle string) error { 406 + func UpdateUserHandle(db DBTX, did string, newHandle string) error { 407 407 _, err := db.Exec(` 408 408 UPDATE users SET handle = ?, last_seen = ? WHERE did = ? 409 409 `, newHandle, time.Now(), did) ··· 412 412 413 413 // UpdateUserAvatar updates a user's avatar URL when a profile change is detected 414 414 // This is called when Jetstream receives an app.bsky.actor.profile update 415 - func UpdateUserAvatar(db *sql.DB, did string, avatarURL string) error { 415 + func UpdateUserAvatar(db DBTX, did string, avatarURL string) error { 416 416 _, err := db.Exec(` 417 417 UPDATE users SET avatar = ?, last_seen = ? WHERE did = ? 418 418 `, avatarURL, time.Now(), did) ··· 420 420 } 421 421 422 422 // GetManifestDigestsForDID returns all manifest digests for a DID 423 - func GetManifestDigestsForDID(db *sql.DB, did string) ([]string, error) { 423 + func GetManifestDigestsForDID(db DBTX, did string) ([]string, error) { 424 424 rows, err := db.Query(` 425 425 SELECT digest FROM manifests WHERE did = ? 426 426 `, did) ··· 442 442 } 443 443 444 444 // DeleteManifestsNotInList deletes all manifests for a DID that are not in the provided list 445 - func DeleteManifestsNotInList(db *sql.DB, did string, keepDigests []string) error { 445 + func DeleteManifestsNotInList(db DBTX, did string, keepDigests []string) error { 446 446 if len(keepDigests) == 0 { 447 447 // No manifests to keep - delete all for this DID 448 448 _, err := db.Exec(`DELETE FROM manifests WHERE did = ?`, did) ··· 467 467 } 468 468 469 469 // GetTagsForDID returns all (repository, tag) pairs for a DID 470 - func GetTagsForDID(db *sql.DB, did string) ([]struct{ Repository, Tag string }, error) { 470 + func GetTagsForDID(db DBTX, did string) ([]struct{ Repository, Tag string }, error) { 471 471 rows, err := db.Query(` 472 472 SELECT repository, tag FROM tags WHERE did = ? 473 473 `, did) ··· 488 488 return tags, rows.Err() 489 489 } 490 490 491 - // DeleteTagsNotInList deletes all tags for a DID that are not in the provided list 492 - func DeleteTagsNotInList(db *sql.DB, did string, keepTags []struct{ Repository, Tag string }) error { 491 + // DeleteTagsNotInList deletes all tags for a DID that are not in the provided list. 492 + // Atomicity is provided by the caller's transaction when used during backfill. 493 + func DeleteTagsNotInList(db DBTX, did string, keepTags []struct{ Repository, Tag string }) error { 493 494 if len(keepTags) == 0 { 494 495 // No tags to keep - delete all for this DID 495 496 _, err := db.Exec(`DELETE FROM tags WHERE did = ?`, did) 496 497 return err 497 498 } 498 499 499 - // For tags, we need to check (repository, tag) pairs 500 - // Build a DELETE query that excludes the pairs we want to keep 501 - tx, err := db.Begin() 502 - if err != nil { 503 - return err 504 - } 505 - defer tx.Rollback() 506 - 507 500 // First, get all current tags 508 - rows, err := tx.Query(`SELECT id, repository, tag FROM tags WHERE did = ?`, did) 501 + rows, err := db.Query(`SELECT id, repository, tag FROM tags WHERE did = ?`, did) 509 502 if err != nil { 510 503 return err 511 504 } ··· 536 529 537 530 // Delete tags not in keep list 538 531 for _, id := range toDelete { 539 - if _, err := tx.Exec(`DELETE FROM tags WHERE id = ?`, id); err != nil { 532 + if _, err := db.Exec(`DELETE FROM tags WHERE id = ?`, id); err != nil { 540 533 return err 541 534 } 542 535 } 543 536 544 - return tx.Commit() 537 + return nil 545 538 } 546 539 547 540 // InsertManifest inserts or updates a manifest record 548 541 // Uses UPSERT to update core metadata if manifest already exists 549 542 // Returns the manifest ID (works correctly for both insert and update) 550 543 // Note: Annotations are stored separately in repository_annotations table 551 - func InsertManifest(db *sql.DB, manifest *Manifest) (int64, error) { 544 + func InsertManifest(db DBTX, manifest *Manifest) (int64, error) { 552 545 _, err := db.Exec(` 553 546 INSERT INTO manifests 554 547 (did, repository, digest, hold_endpoint, schema_version, media_type, ··· 584 577 } 585 578 586 579 // InsertLayer inserts a new layer record 587 - func InsertLayer(db *sql.DB, layer *Layer) error { 580 + func InsertLayer(db DBTX, layer *Layer) error { 588 581 _, err := db.Exec(` 589 582 INSERT INTO layers (manifest_id, digest, size, media_type, layer_index) 590 583 VALUES (?, ?, ?, ?, ?) ··· 593 586 } 594 587 595 588 // UpsertTag inserts or updates a tag record 596 - func UpsertTag(db *sql.DB, tag *Tag) error { 589 + func UpsertTag(db DBTX, tag *Tag) error { 597 590 _, err := db.Exec(` 598 591 INSERT INTO tags (did, repository, tag, digest, created_at) 599 592 VALUES (?, ?, ?, ?, ?) ··· 605 598 } 606 599 607 600 // DeleteTag deletes a tag record 608 - func DeleteTag(db *sql.DB, did, repository, tag string) error { 601 + func DeleteTag(db DBTX, did, repository, tag string) error { 609 602 _, err := db.Exec(` 610 603 DELETE FROM tags WHERE did = ? AND repository = ? AND tag = ? 611 604 `, did, repository, tag) ··· 616 609 // Only multi-arch tags (manifest lists) have platform info in manifest_references 617 610 // Single-arch tags will have empty Platforms slice (platform is obvious for single-arch) 618 611 // Attestation references (unknown/unknown platforms) are filtered out but tracked via HasAttestations 619 - func GetTagsWithPlatforms(db *sql.DB, did, repository string) ([]TagWithPlatforms, error) { 612 + func GetTagsWithPlatforms(db DBTX, did, repository string) ([]TagWithPlatforms, error) { 620 613 rows, err := db.Query(` 621 614 SELECT 622 615 t.id, ··· 700 693 701 694 // DeleteManifest deletes a manifest and its associated layers 702 695 // If repository is empty, deletes all manifests matching did and digest 703 - func DeleteManifest(db *sql.DB, did, repository, digest string) error { 696 + func DeleteManifest(db DBTX, did, repository, digest string) error { 704 697 var err error 705 698 if repository == "" { 706 699 // Delete by DID + digest only (used when repository is unknown, e.g., Jetstream DELETE events) ··· 718 711 // 719 712 // Due to ON DELETE CASCADE in the schema, deleting from users will automatically 720 713 // cascade to: manifests, tags, layers, references, annotations, stars, repo_pages, etc. 721 - func DeleteUserData(db *sql.DB, did string) error { 714 + func DeleteUserData(db DBTX, did string) error { 722 715 result, err := db.Exec(`DELETE FROM users WHERE did = ?`, did) 723 716 if err != nil { 724 717 return fmt.Errorf("failed to delete user: %w", err) ··· 735 728 736 729 // GetManifest fetches a single manifest by digest 737 730 // Note: Annotations are stored separately in repository_annotations table 738 - func GetManifest(db *sql.DB, digest string) (*Manifest, error) { 731 + func GetManifest(db DBTX, digest string) (*Manifest, error) { 739 732 var m Manifest 740 733 741 734 err := db.QueryRow(` ··· 756 749 757 750 // GetNewestManifestForRepo returns the newest manifest for a specific repository 758 751 // Used by backfill to ensure annotations come from the most recent manifest 759 - func GetNewestManifestForRepo(db *sql.DB, did, repository string) (*Manifest, error) { 752 + func GetNewestManifestForRepo(db DBTX, did, repository string) (*Manifest, error) { 760 753 var m Manifest 761 754 err := db.QueryRow(` 762 755 SELECT id, did, repository, digest, hold_endpoint, schema_version, media_type, ··· 779 772 // GetLatestHoldDIDForRepo returns the hold DID from the most recent manifest for a repository 780 773 // Returns empty string if no manifests exist (e.g., first push) 781 774 // This is used instead of the in-memory cache to determine which hold to use for blob operations 782 - func GetLatestHoldDIDForRepo(db *sql.DB, did, repository string) (string, error) { 775 + func GetLatestHoldDIDForRepo(db DBTX, did, repository string) (string, error) { 783 776 var holdDID string 784 777 err := db.QueryRow(` 785 778 SELECT hold_endpoint ··· 802 795 803 796 // GetRepositoriesForDID returns all unique repository names for a DID 804 797 // Used by backfill to reconcile annotations for all repositories 805 - func GetRepositoriesForDID(db *sql.DB, did string) ([]string, error) { 798 + func GetRepositoriesForDID(db DBTX, did string) ([]string, error) { 806 799 rows, err := db.Query(` 807 800 SELECT DISTINCT repository 808 801 FROM manifests ··· 825 818 } 826 819 827 820 // GetLayersForManifest fetches all layers for a manifest 828 - func GetLayersForManifest(db *sql.DB, manifestID int64) ([]Layer, error) { 821 + func GetLayersForManifest(db DBTX, manifestID int64) ([]Layer, error) { 829 822 rows, err := db.Query(` 830 823 SELECT manifest_id, digest, size, media_type, layer_index 831 824 FROM layers ··· 851 844 } 852 845 853 846 // InsertManifestReference inserts a new manifest reference record (for manifest lists/indexes) 854 - func InsertManifestReference(db *sql.DB, ref *ManifestReference) error { 847 + func InsertManifestReference(db DBTX, ref *ManifestReference) error { 855 848 _, err := db.Exec(` 856 849 INSERT INTO manifest_references (manifest_id, digest, size, media_type, 857 850 platform_architecture, platform_os, ··· 866 859 } 867 860 868 861 // GetManifestReferencesForManifest fetches all manifest references for a manifest list/index 869 - func GetManifestReferencesForManifest(db *sql.DB, manifestID int64) ([]ManifestReference, error) { 862 + func GetManifestReferencesForManifest(db DBTX, manifestID int64) ([]ManifestReference, error) { 870 863 rows, err := db.Query(` 871 864 SELECT manifest_id, digest, size, media_type, 872 865 platform_architecture, platform_os, platform_variant, platform_os_version, ··· 914 907 // GetTopLevelManifests returns only manifest lists and orphaned single-arch manifests 915 908 // Filters out platform-specific manifests that are referenced by manifest lists 916 909 // Note: Annotations are stored separately in repository_annotations table - use GetRepositoryMetadata to fetch them 917 - func GetTopLevelManifests(db *sql.DB, did, repository string, limit, offset int) ([]ManifestWithMetadata, error) { 910 + func GetTopLevelManifests(db DBTX, did, repository string, limit, offset int) ([]ManifestWithMetadata, error) { 918 911 rows, err := db.Query(` 919 912 WITH manifest_list_children AS ( 920 913 -- Get all digests that are children of manifest lists ··· 1047 1040 1048 1041 // GetManifestDetail returns a manifest with full platform details and tags 1049 1042 // Note: Annotations are stored separately in repository_annotations table - use GetRepositoryMetadata to fetch them 1050 - func GetManifestDetail(db *sql.DB, did, repository, digest string) (*ManifestWithMetadata, error) { 1043 + func GetManifestDetail(db DBTX, did, repository, digest string) (*ManifestWithMetadata, error) { 1051 1044 // First, get the manifest and its tags 1052 1045 var m ManifestWithMetadata 1053 1046 var tags, configDigest sql.NullString ··· 1152 1145 } 1153 1146 1154 1147 // GetFirehoseCursor retrieves the current firehose cursor 1155 - func GetFirehoseCursor(db *sql.DB) (int64, error) { 1148 + func GetFirehoseCursor(db DBTX) (int64, error) { 1156 1149 var cursor int64 1157 1150 err := db.QueryRow("SELECT cursor FROM firehose_cursor WHERE id = 1").Scan(&cursor) 1158 1151 if err == sql.ErrNoRows { ··· 1162 1155 } 1163 1156 1164 1157 // UpdateFirehoseCursor updates the firehose cursor 1165 - func UpdateFirehoseCursor(db *sql.DB, cursor int64) error { 1158 + func UpdateFirehoseCursor(db DBTX, cursor int64) error { 1166 1159 _, err := db.Exec(` 1167 1160 INSERT INTO firehose_cursor (id, cursor, updated_at) 1168 1161 VALUES (1, ?, datetime('now')) ··· 1174 1167 } 1175 1168 1176 1169 // IsManifestTagged checks if a manifest has any tags 1177 - func IsManifestTagged(db *sql.DB, did, repository, digest string) (bool, error) { 1170 + func IsManifestTagged(db DBTX, did, repository, digest string) (bool, error) { 1178 1171 var count int 1179 1172 err := db.QueryRow(` 1180 1173 SELECT COUNT(*) FROM tags ··· 1189 1182 } 1190 1183 1191 1184 // GetManifestTags retrieves all tags for a manifest 1192 - func GetManifestTags(db *sql.DB, did, repository, digest string) ([]string, error) { 1185 + func GetManifestTags(db DBTX, did, repository, digest string) ([]string, error) { 1193 1186 rows, err := db.Query(` 1194 1187 SELECT tag FROM tags 1195 1188 WHERE did = ? AND repository = ? AND digest = ? ··· 1225 1218 } 1226 1219 1227 1220 // GetBackfillState retrieves the backfill state 1228 - func GetBackfillState(db *sql.DB) (*BackfillState, error) { 1221 + func GetBackfillState(db DBTX) (*BackfillState, error) { 1229 1222 var state BackfillState 1230 1223 var updatedAtStr string 1231 1224 ··· 1263 1256 } 1264 1257 1265 1258 // UpsertBackfillState updates or creates backfill state 1266 - func UpsertBackfillState(db *sql.DB, state *BackfillState) error { 1259 + func UpsertBackfillState(db DBTX, state *BackfillState) error { 1267 1260 _, err := db.Exec(` 1268 1261 INSERT INTO backfill_state (id, start_cursor, current_cursor, completed, updated_at) 1269 1262 VALUES (1, ?, ?, ?, datetime('now')) ··· 1277 1270 } 1278 1271 1279 1272 // UpdateBackfillCursor updates just the current cursor position 1280 - func UpdateBackfillCursor(db *sql.DB, cursor int64) error { 1273 + func UpdateBackfillCursor(db DBTX, cursor int64) error { 1281 1274 _, err := db.Exec(` 1282 1275 UPDATE backfill_state 1283 1276 SET current_cursor = ?, updated_at = datetime('now') ··· 1287 1280 } 1288 1281 1289 1282 // MarkBackfillCompleted marks the backfill as completed 1290 - func MarkBackfillCompleted(db *sql.DB) error { 1283 + func MarkBackfillCompleted(db DBTX) error { 1291 1284 _, err := db.Exec(` 1292 1285 UPDATE backfill_state 1293 1286 SET completed = 1, updated_at = datetime('now') ··· 1297 1290 } 1298 1291 1299 1292 // GetRepository fetches a specific repository for a user 1300 - func GetRepository(db *sql.DB, did, repository string) (*Repository, error) { 1293 + func GetRepository(db DBTX, did, repository string) (*Repository, error) { 1301 1294 // Get repository summary 1302 1295 var r Repository 1303 1296 r.Name = repository ··· 1412 1405 } 1413 1406 1414 1407 // GetRepositoryStats fetches stats for a repository 1415 - func GetRepositoryStats(db *sql.DB, did, repository string) (*RepositoryStats, error) { 1408 + func GetRepositoryStats(db DBTX, did, repository string) (*RepositoryStats, error) { 1416 1409 var stats RepositoryStats 1417 1410 var lastPullStr, lastPushStr sql.NullString 1418 1411 ··· 1463 1456 1464 1457 // UpsertRepositoryStats inserts or updates repository stats 1465 1458 // Note: star_count is calculated dynamically from the stars table, not stored here 1466 - func UpsertRepositoryStats(db *sql.DB, stats *RepositoryStats) error { 1459 + func UpsertRepositoryStats(db DBTX, stats *RepositoryStats) error { 1467 1460 _, err := db.Exec(` 1468 1461 INSERT INTO repository_stats (did, repository, pull_count, last_pull, push_count, last_push) 1469 1462 VALUES (?, ?, ?, ?, ?, ?) ··· 1477 1470 } 1478 1471 1479 1472 // UpsertStar inserts or updates a star record (idempotent) 1480 - func UpsertStar(db *sql.DB, starrerDID, ownerDID, repository string, createdAt time.Time) error { 1473 + func UpsertStar(db DBTX, starrerDID, ownerDID, repository string, createdAt time.Time) error { 1481 1474 _, err := db.Exec(` 1482 1475 INSERT INTO stars (starrer_did, owner_did, repository, created_at) 1483 1476 VALUES (?, ?, ?, ?) ··· 1488 1481 } 1489 1482 1490 1483 // DeleteStar deletes a star record 1491 - func DeleteStar(db *sql.DB, starrerDID, ownerDID, repository string) error { 1484 + func DeleteStar(db DBTX, starrerDID, ownerDID, repository string) error { 1492 1485 _, err := db.Exec(` 1493 1486 DELETE FROM stars 1494 1487 WHERE starrer_did = ? AND owner_did = ? AND repository = ? ··· 1497 1490 } 1498 1491 1499 1492 // RebuildStarCount rebuilds the star count for a specific repository from the stars table 1500 - func RebuildStarCount(db *sql.DB, ownerDID, repository string) error { 1493 + func RebuildStarCount(db DBTX, ownerDID, repository string) error { 1501 1494 _, err := db.Exec(` 1502 1495 INSERT INTO repository_stats (did, repository, star_count) 1503 1496 VALUES (?, ?, ( ··· 1515 1508 1516 1509 // GetStarsForDID returns all stars created by a specific DID (for backfill reconciliation) 1517 1510 // Returns a map of (ownerDID, repository) -> createdAt 1518 - func GetStarsForDID(db *sql.DB, starrerDID string) (map[string]time.Time, error) { 1511 + func GetStarsForDID(db DBTX, starrerDID string) (map[string]time.Time, error) { 1519 1512 rows, err := db.Query(` 1520 1513 SELECT owner_did, repository, created_at 1521 1514 FROM stars ··· 1542 1535 1543 1536 // CleanupOrphanedTags removes tags whose manifest digest no longer exists 1544 1537 // This handles cases where manifests were deleted but tags pointing to them remain 1545 - func CleanupOrphanedTags(db *sql.DB, did string) error { 1538 + func CleanupOrphanedTags(db DBTX, did string) error { 1546 1539 _, err := db.Exec(` 1547 1540 DELETE FROM tags 1548 1541 WHERE did = ? ··· 1557 1550 1558 1551 // DeleteStarsNotInList deletes stars from the database that are not in the provided list 1559 1552 // This is used during backfill reconciliation to remove stars that no longer exist on PDS 1560 - func DeleteStarsNotInList(db *sql.DB, starrerDID string, foundStars map[string]time.Time) error { 1553 + func DeleteStarsNotInList(db DBTX, starrerDID string, foundStars map[string]time.Time) error { 1561 1554 // Get current stars in DB 1562 1555 currentStars, err := GetStarsForDID(db, starrerDID) 1563 1556 if err != nil { ··· 1608 1601 // HoldDIDDB wraps a sql.DB and implements the HoldDIDLookup interface for middleware 1609 1602 // This is a minimal wrapper that only provides hold DID lookups for blob routing 1610 1603 type HoldDIDDB struct { 1611 - db *sql.DB 1604 + db DBTX 1612 1605 } 1613 1606 1614 1607 // NewHoldDIDDB creates a new hold DID database wrapper 1615 - func NewHoldDIDDB(db *sql.DB) *HoldDIDDB { 1608 + func NewHoldDIDDB(db DBTX) *HoldDIDDB { 1616 1609 return &HoldDIDDB{db: db} 1617 1610 } 1618 1611 ··· 1632 1625 ) 1633 1626 1634 1627 // GetRepoCards fetches repository cards with full data including Tag, Digest, and LastUpdated 1635 - func GetRepoCards(db *sql.DB, limit int, currentUserDID string, sortOrder RepoCardSortOrder) ([]RepoCardData, error) { 1628 + func GetRepoCards(db DBTX, limit int, currentUserDID string, sortOrder RepoCardSortOrder) ([]RepoCardData, error) { 1636 1629 // Build ORDER BY clause based on sort order 1637 1630 var orderBy string 1638 1631 switch sortOrder { ··· 1713 1706 } 1714 1707 1715 1708 // GetUserRepoCards fetches repository cards for a specific user with full data 1716 - func GetUserRepoCards(db *sql.DB, userDID string, currentUserDID string) ([]RepoCardData, error) { 1709 + func GetUserRepoCards(db DBTX, userDID string, currentUserDID string) ([]RepoCardData, error) { 1717 1710 query := ` 1718 1711 WITH latest_manifests AS ( 1719 1712 SELECT did, repository, MAX(id) as latest_id ··· 1795 1788 } 1796 1789 1797 1790 // UpsertRepoPage inserts or updates a repo page record 1798 - func UpsertRepoPage(db *sql.DB, did, repository, description, avatarCID string, createdAt, updatedAt time.Time) error { 1791 + func UpsertRepoPage(db DBTX, did, repository, description, avatarCID string, createdAt, updatedAt time.Time) error { 1799 1792 _, err := db.Exec(` 1800 1793 INSERT INTO repo_pages (did, repository, description, avatar_cid, created_at, updated_at) 1801 1794 VALUES (?, ?, ?, ?, ?, ?) ··· 1808 1801 } 1809 1802 1810 1803 // GetRepoPage retrieves a repo page record 1811 - func GetRepoPage(db *sql.DB, did, repository string) (*RepoPage, error) { 1804 + func GetRepoPage(db DBTX, did, repository string) (*RepoPage, error) { 1812 1805 var rp RepoPage 1813 1806 err := db.QueryRow(` 1814 1807 SELECT did, repository, description, avatar_cid, created_at, updated_at ··· 1822 1815 } 1823 1816 1824 1817 // DeleteRepoPage deletes a repo page record 1825 - func DeleteRepoPage(db *sql.DB, did, repository string) error { 1818 + func DeleteRepoPage(db DBTX, did, repository string) error { 1826 1819 _, err := db.Exec(` 1827 1820 DELETE FROM repo_pages WHERE did = ? AND repository = ? 1828 1821 `, did, repository) ··· 1830 1823 } 1831 1824 1832 1825 // GetRepoPagesByDID returns all repo pages for a DID 1833 - func GetRepoPagesByDID(db *sql.DB, did string) ([]RepoPage, error) { 1826 + func GetRepoPagesByDID(db DBTX, did string) ([]RepoPage, error) { 1834 1827 rows, err := db.Query(` 1835 1828 SELECT did, repository, description, avatar_cid, created_at, updated_at 1836 1829 FROM repo_pages
+29 -5
pkg/appview/jetstream/backfill.go
··· 181 181 return nil 182 182 } 183 183 184 - // backfillRepo backfills all records for a single repo/DID 184 + // backfillRepo backfills all records for a single repo/DID. 185 + // Per-record processing is wrapped in a single SQL transaction to batch writes 186 + // (one commit per repo instead of per-statement). 185 187 func (b *BackfillWorker) backfillRepo(ctx context.Context, did, collection string) (int, error) { 186 188 // Resolve DID to get user's PDS endpoint 187 189 pdsEndpoint, err := atproto.ResolveDIDToPDS(ctx, did) ··· 192 194 // Create a client for this user's PDS with the user's DID 193 195 // This allows GetRecord to work properly with the repo parameter 194 196 pdsClient := atproto.NewClient(pdsEndpoint, did, "") 197 + 198 + // Begin transaction for per-record processing (batches all writes into one commit) 199 + tx, err := b.db.Begin() 200 + if err != nil { 201 + return 0, fmt.Errorf("failed to begin transaction: %w", err) 202 + } 203 + defer tx.Rollback() 204 + 205 + // Create a transactional processor — all DB writes go through this tx 206 + txProcessor := NewProcessor(tx, false, b.processor.statsCache) 195 207 196 208 var recordCursor string 197 209 recordCount := 0 ··· 235 247 } 236 248 } 237 249 238 - if err := b.processRecord(ctx, did, collection, &record); err != nil { 250 + if err := b.processRecordWith(ctx, txProcessor, did, collection, &record); err != nil { 239 251 slog.Warn("Backfill failed to process record", "uri", record.URI, "error", err) 240 252 continue 241 253 } ··· 250 262 recordCursor = cursor 251 263 } 252 264 265 + // Commit all per-record writes in one batch 266 + if err := tx.Commit(); err != nil { 267 + return 0, fmt.Errorf("failed to commit transaction: %w", err) 268 + } 269 + 270 + // Reconciliation runs outside the transaction (involves network I/O and fewer writes) 271 + 253 272 // Reconcile deletions - remove records from DB that no longer exist on PDS 254 273 if err := b.reconcileDeletions(did, collection, foundManifestDigests, foundTags, foundStars); err != nil { 255 274 slog.Warn("Backfill failed to reconcile deletions", "did", did, "error", err) ··· 334 353 return nil 335 354 } 336 355 337 - // processRecord processes a single record using the unified ProcessRecord method. 338 - // This ensures consistent handling (validation, user creation) between Worker and Backfill. 356 + // processRecord processes a single record using the default processor. 339 357 func (b *BackfillWorker) processRecord(ctx context.Context, did, collection string, record *atproto.Record) error { 358 + return b.processRecordWith(ctx, b.processor, did, collection, record) 359 + } 360 + 361 + // processRecordWith processes a single record using the given processor. 362 + // This allows backfillRepo to use a transactional processor while other callers use the default. 363 + func (b *BackfillWorker) processRecordWith(ctx context.Context, proc *Processor, did, collection string, record *atproto.Record) error { 340 364 rkey := extractRkeyFromURI(record.URI) 341 365 342 366 // For sailor profile collection, we need to pass the queryCaptainFn ··· 346 370 queryCaptainFn = b.queryCaptainRecordWrapper 347 371 } 348 372 349 - return b.processor.ProcessRecord(ctx, did, collection, rkey, record.Value, false, queryCaptainFn) 373 + return proc.ProcessRecord(ctx, did, collection, rkey, record.Value, false, queryCaptainFn) 350 374 } 351 375 352 376 // queryCaptainRecordWrapper wraps queryCaptainRecord with backfill-specific logic
+2 -3
pkg/appview/jetstream/processor.go
··· 2 2 3 3 import ( 4 4 "context" 5 - "database/sql" 6 5 "encoding/json" 7 6 "fmt" 8 7 "log/slog" ··· 18 17 // Processor handles shared database operations for both Worker (live) and Backfill (sync) 19 18 // This eliminates code duplication between the two data ingestion paths 20 19 type Processor struct { 21 - db *sql.DB 20 + db db.DBTX 22 21 userCache *UserCache // Optional - enabled for Worker, disabled for Backfill 23 22 statsCache *StatsCache // In-memory cache for per-hold stats aggregation 24 23 useCache bool ··· 28 27 // NewProcessor creates a new shared processor 29 28 // useCache: true for Worker (live streaming), false for Backfill (batch processing) 30 29 // statsCache: shared stats cache for aggregating across holds (nil to skip stats processing) 31 - func NewProcessor(database *sql.DB, useCache bool, statsCache *StatsCache) *Processor { 30 + func NewProcessor(database db.DBTX, useCache bool, statsCache *StatsCache) *Processor { 32 31 // Create lexicon catalog for debug validation logging 33 32 dir := identity.DefaultDirectory() 34 33 catalog := lexicon.NewResolvingCatalog()