···5454 # Create a wildcard crew record allowing any authenticated user to join.
5555 allow_all_crew: false
5656 # URL to fetch avatar image from during bootstrap.
5757- profile_avatar_url: https://imgs.blue/evan.jarrett.net/1TpTOdtS60GdJWBYEqtK22y688jajbQ9a5kbYRFtwuqrkBAE
5757+ profile_avatar_url: https://atcr.io/web-app-manifest-192x192.png
5858 # Post to Bluesky when users push images. Synced to captain record on startup.
5959 enable_bluesky_posts: false
6060 # Deployment region, auto-detected from cloud metadata or S3 config.
···7575admin:
7676 # Enable the web-based admin panel for crew and storage management.
7777 enabled: true
7878+# Garbage collection settings.
7979+gc:
8080+ # Enable nightly garbage collection of orphaned blobs and records.
8181+ enabled: false
7882# Storage quota tiers. Empty disables quota enforcement.
7983quota:
8084 # Quota tiers keyed by rank name. Each tier has a human-readable quota limit.
+49
docs/CONFIG_BLOB_STORAGE.md
···11+# Config Blob Storage Decision
22+33+## Background
44+55+OCI image manifests reference two types of blobs:
66+77+1. **Layers** — filesystem diffs (tar+gzip), typically large, content-addressed and shared across users
88+2. **Config blob** — small JSON (~2-15KB) containing image metadata: architecture, OS, environment variables, entrypoint, Dockerfile build history, and labels
99+1010+In ATCR, manifests are stored in the user's PDS while all blobs (layers and config) are stored in S3 via the hold service. The hold tracks layers with `io.atcr.hold.layer` records but has no equivalent tracking for config blobs.
1111+1212+## Considered: Storing Config Blobs in PDS
1313+1414+Config blobs are unique per image build — unlike layers which are deduplicated across users, a config blob contains the specific Dockerfile history, env vars, and labels for that build. This makes them conceptually "user data" that could belong in the user's PDS alongside the manifest.
1515+1616+The proposal was to add a `ConfigBlob` field to `ManifestRecord`, uploading the config blob to PDS during push (the data is already fetched from S3 for label extraction). The config would remain in S3 as well since the distribution library puts it there during the blob push phase.
1717+1818+Potential benefits:
1919+- Manifests become more self-contained in PDS
2020+- Config metadata (entrypoint, env, history) available without S3 access (e.g., for web UI)
2121+- Aligns with the principle that user-specific data belongs in the user's PDS
2222+2323+## Decision: Keep Config Blobs in S3 Only
2424+2525+Config blobs can contain sensitive data:
2626+2727+- **Environment variables** — `ENV DATABASE_URL=...`, `ENV API_KEY=...` set in Dockerfiles
2828+- **Build history** — `history[].created_by` reveals exact Dockerfile commands, internal registry URLs, build arguments
2929+- **Labels** — may contain internal metadata not intended for public consumption
3030+3131+ATProto has no private data. The current storage split creates a useful privacy boundary:
3232+3333+| Storage | Visibility | Contains |
3434+|---------|-----------|----------|
3535+| PDS | Public (anyone) | Manifest structure, tags, repo names, annotations |
3636+| Hold/S3 | Auth-gated | Layers + config — actual image content |
3737+3838+This boundary enables **semi-private repos**: the public PDS metadata tells you what images exist (names, tags, sizes), but you cannot reconstruct or run the image without hold access. Storing config in PDS would break this — build secrets and Dockerfile history would be publicly readable even when the hold restricts blob access.
3939+4040+We considered making PDS storage optional (only for fully public holds or allow-all-crew holds), but an optional field that can't be relied upon adds complexity without clear benefit — the config must live in S3 regardless for the pull path.
4141+4242+## Current Status
4343+4444+Config blobs remain in S3 behind hold authorization. GC handles config digests to prevent orphaned deletion (config digests are included in the referenced set alongside layer digests).
4545+4646+## Revisit If
4747+4848+- ATProto adds private data support
4949+- A concrete use case emerges that requires PDS-native config access
···454454 return result, rows.Err()
455455}
456456457457+// DistinctDIDs returns all unique DIDs that have records in a collection.
458458+// Used by GC to discover all users who have pushed to this hold.
459459+func (ri *RecordsIndex) DistinctDIDs(collection string) ([]string, error) {
460460+ rows, err := ri.db.Query(`
461461+ SELECT DISTINCT did FROM records WHERE collection = ? AND did IS NOT NULL
462462+ `, collection)
463463+ if err != nil {
464464+ return nil, fmt.Errorf("failed to query distinct DIDs: %w", err)
465465+ }
466466+ defer rows.Close()
467467+468468+ var dids []string
469469+ for rows.Next() {
470470+ var did string
471471+ if err := rows.Scan(&did); err != nil {
472472+ return nil, fmt.Errorf("failed to scan DID: %w", err)
473473+ }
474474+ dids = append(dids, did)
475475+ }
476476+ return dids, rows.Err()
477477+}
478478+457479// QuotaResult holds aggregated quota data from SQL queries
458480type QuotaResult struct {
459481 UniqueBlobs int
+59
pkg/hold/pds/records_test.go
···740740 t.Errorf("Expected total size 100, got %d", totalSize)
741741 }
742742}
743743+744744+// TestRecordsIndex_DistinctDIDs tests retrieving unique DIDs from a collection
745745+func TestRecordsIndex_DistinctDIDs(t *testing.T) {
746746+ tmpDir := t.TempDir()
747747+ ri, err := NewRecordsIndex(filepath.Join(tmpDir, "records.db"))
748748+ if err != nil {
749749+ t.Fatalf("NewRecordsIndex() error = %v", err)
750750+ }
751751+ defer ri.Close()
752752+753753+ // Add layer records for multiple users (some with duplicate DIDs)
754754+ ri.IndexRecord("io.atcr.hold.layer", "r1", "cid1", "did:plc:alice", "sha256:aaa", 100)
755755+ ri.IndexRecord("io.atcr.hold.layer", "r2", "cid2", "did:plc:alice", "sha256:bbb", 200)
756756+ ri.IndexRecord("io.atcr.hold.layer", "r3", "cid3", "did:plc:bob", "sha256:ccc", 500)
757757+ // Non-layer record should be excluded
758758+ ri.IndexRecord("io.atcr.hold.crew", "r4", "cid4", "did:plc:charlie", "", 0)
759759+760760+ dids, err := ri.DistinctDIDs("io.atcr.hold.layer")
761761+ if err != nil {
762762+ t.Fatalf("DistinctDIDs() error = %v", err)
763763+ }
764764+765765+ if len(dids) != 2 {
766766+ t.Fatalf("Expected 2 distinct DIDs, got %d: %v", len(dids), dids)
767767+ }
768768+769769+ didSet := make(map[string]bool)
770770+ for _, d := range dids {
771771+ didSet[d] = true
772772+ }
773773+774774+ if !didSet["did:plc:alice"] {
775775+ t.Error("Expected did:plc:alice in results")
776776+ }
777777+ if !didSet["did:plc:bob"] {
778778+ t.Error("Expected did:plc:bob in results")
779779+ }
780780+ if didSet["did:plc:charlie"] {
781781+ t.Error("did:plc:charlie should not be in layer DIDs")
782782+ }
783783+}
784784+785785+// TestRecordsIndex_DistinctDIDs_Empty tests DistinctDIDs with no records
786786+func TestRecordsIndex_DistinctDIDs_Empty(t *testing.T) {
787787+ tmpDir := t.TempDir()
788788+ ri, err := NewRecordsIndex(filepath.Join(tmpDir, "records.db"))
789789+ if err != nil {
790790+ t.Fatalf("NewRecordsIndex() error = %v", err)
791791+ }
792792+ defer ri.Close()
793793+794794+ dids, err := ri.DistinctDIDs("io.atcr.hold.layer")
795795+ if err != nil {
796796+ t.Fatalf("DistinctDIDs() error = %v", err)
797797+ }
798798+ if len(dids) != 0 {
799799+ t.Errorf("Expected empty DIDs slice, got %d entries", len(dids))
800800+ }
801801+}