···2121# Production: Set to your public URL (e.g., https://atcr.io)
2222# ATCR_BASE_URL=http://127.0.0.1:5000
23232424-# Service name (used for JWT service/issuer fields)
2525-# Default: Derived from base URL hostname, or "atcr.io"
2626-# ATCR_SERVICE_NAME=atcr.io
2727-2824# ==============================================================================
2925# Storage Configuration
3026# ==============================================================================
···4844# Path to JWT signing certificate (auto-generated if missing)
4945# Default: /var/lib/atcr/auth/private-key.crt
5046# ATCR_AUTH_CERT_PATH=/var/lib/atcr/auth/private-key.crt
5151-5252-# JWT token expiration in seconds (default: 300 = 5 minutes)
5353-# ATCR_TOKEN_EXPIRATION=300
54475548# Path to OAuth client P-256 signing key (auto-generated on first run)
5649# Used for confidential OAuth client authentication (production only)
···130123# ATProto relay endpoint for backfill sync API
131124# Default: https://relay1.us-east.bsky.network
132125# ATCR_RELAY_ENDPOINT=https://relay1.us-east.bsky.network
133133-134134-# Backfill interval (default: 1h)
135135-# Examples: 30m, 1h, 2h, 24h
136136-# ATCR_BACKFILL_INTERVAL=1h
-13
.env.example
···4545# Production: Set to your public URL (e.g., https://atcr.io)
4646# ATCR_BASE_URL=https://atcr.io
47474848-# Service name for JWT issuer/service fields
4949-# Default: Derived from ATCR_BASE_URL hostname, or "atcr.io"
5050-# ATCR_SERVICE_NAME=atcr.io
5151-5248# ==============================================================================
5349# APPVIEW - STORAGE CONFIGURATION (REQUIRED)
5450# ==============================================================================
···7268# Default: /var/lib/atcr/auth/private-key.crt
7369# ATCR_AUTH_CERT_PATH=/var/lib/atcr/auth/private-key.crt
74707575-# JWT token expiration in seconds
7676-# Default: 300 (5 minutes)
7777-# ATCR_TOKEN_EXPIRATION=300
7878-7971# Path to OAuth client P-256 signing key (auto-generated for production)
8072# Used for confidential OAuth client authentication
8173# Localhost deployments always use public OAuth clients (no key needed)
···109101# ATProto relay endpoint for backfill sync API
110102# Default: https://relay1.us-east.bsky.network
111103# ATCR_RELAY_ENDPOINT=https://relay1.us-east.bsky.network
112112-113113-# Backfill sync interval
114114-# Default: 1h
115115-# Examples: 30m, 1h, 2h, 24h
116116-# ATCR_BACKFILL_INTERVAL=1h
117104118105# ==============================================================================
119106# APPVIEW - HEALTH CHECKS
+12
.env.hold.example
···151151# Basic auth credentials (optional)
152152# ATCR_LOG_SHIPPER_USERNAME=
153153# ATCR_LOG_SHIPPER_PASSWORD=
154154+155155+# ==============================================================================
156156+# Garbage Collection
157157+# ==============================================================================
158158+159159+# Enable garbage collection for orphaned blobs (default: true)
160160+# GC runs on startup and then nightly (every 24 hours)
161161+GC_ENABLED=true
162162+163163+# Dry-run mode: log what would be deleted without actually deleting (default: true)
164164+# Set to false after validating the GC logs show correct behavior
165165+GC_DRY_RUN=true
···11+// Package gc implements garbage collection for the hold service.
22+// It periodically cleans up orphaned blobs from S3 storage based on
33+// layer records in the hold's embedded PDS.
44+package gc
55+66+import (
77+ "os"
88+ "time"
99+)
1010+1111+// Hardcoded defaults - keep configuration simple
1212+const (
1313+ // gcInterval is how often GC runs (nightly)
1414+ gcInterval = 24 * time.Hour
1515+1616+ // gcGracePeriod is how old a layer record must be before it's considered for GC.
1717+ // Records created in the last 7 days are skipped (GDPR/CCPA compliant).
1818+ gcGracePeriod = 7 * 24 * time.Hour
1919+)
2020+2121+// Config holds GC configuration, loaded from environment variables
2222+type Config struct {
2323+ // Enabled controls whether GC is active (GC_ENABLED, default: true)
2424+ Enabled bool
2525+2626+ // DryRun logs what would be deleted without actually deleting (GC_DRY_RUN, default: true)
2727+ // Remove after initial validation
2828+ DryRun bool
2929+}
3030+3131+// LoadConfigFromEnv loads GC configuration from environment variables
3232+func LoadConfigFromEnv() Config {
3333+ return Config{
3434+ Enabled: os.Getenv("GC_ENABLED") != "false", // Default true
3535+ DryRun: os.Getenv("GC_DRY_RUN") != "false", // Default true
3636+ }
3737+}
+446
pkg/hold/gc/gc.go
···11+package gc
22+33+import (
44+ "bytes"
55+ "context"
66+ "encoding/json"
77+ "fmt"
88+ "io"
99+ "log/slog"
1010+ "net/http"
1111+ "regexp"
1212+ "strings"
1313+ "sync"
1414+ "time"
1515+1616+ "atcr.io/pkg/atproto"
1717+ "atcr.io/pkg/hold/pds"
1818+ "github.com/bluesky-social/indigo/atproto/syntax"
1919+ storagedriver "github.com/distribution/distribution/v3/registry/storage/driver"
2020+)
2121+2222+// GarbageCollector handles cleanup of orphaned blobs from storage
2323+type GarbageCollector struct {
2424+ pds *pds.HoldPDS
2525+ driver storagedriver.StorageDriver
2626+ cfg Config
2727+ logger *slog.Logger
2828+2929+ // stopCh signals the background goroutine to stop
3030+ stopCh chan struct{}
3131+ // wg tracks the background goroutine
3232+ wg sync.WaitGroup
3333+}
3434+3535+// GCResult contains statistics from a GC run
3636+type GCResult struct {
3737+ BlobsDeleted int64 `json:"blobs_deleted"`
3838+ BytesReclaimed int64 `json:"bytes_reclaimed"`
3939+ RecordsDeleted int64 `json:"records_deleted"`
4040+ OrphanedRecords int64 `json:"orphaned_records"`
4141+ OrphanedBlobs int64 `json:"orphaned_blobs"`
4242+ ReferencedBlobs int64 `json:"referenced_blobs"`
4343+ Duration time.Duration `json:"duration"`
4444+}
4545+4646+// NewGarbageCollector creates a new GC instance
4747+func NewGarbageCollector(holdPDS *pds.HoldPDS, driver storagedriver.StorageDriver, cfg Config) *GarbageCollector {
4848+ return &GarbageCollector{
4949+ pds: holdPDS,
5050+ driver: driver,
5151+ cfg: cfg,
5252+ logger: slog.Default().With("component", "gc"),
5353+ stopCh: make(chan struct{}),
5454+ }
5555+}
5656+5757+// Start begins the GC background process
5858+// It runs GC immediately on startup, then periodically according to gcInterval
5959+func (gc *GarbageCollector) Start(ctx context.Context) {
6060+ if !gc.cfg.Enabled {
6161+ gc.logger.Info("GC disabled")
6262+ return
6363+ }
6464+6565+ // Run on startup
6666+ gc.logger.Info("Running GC on startup", "dryRun", gc.cfg.DryRun)
6767+ result, err := gc.Run(ctx)
6868+ if err != nil {
6969+ gc.logger.Error("Startup GC failed", "error", err)
7070+ } else {
7171+ gc.logResult(result)
7272+ }
7373+7474+ // Start background ticker for nightly runs
7575+ gc.wg.Add(1)
7676+ go func() {
7777+ defer gc.wg.Done()
7878+7979+ ticker := time.NewTicker(gcInterval)
8080+ defer ticker.Stop()
8181+8282+ for {
8383+ select {
8484+ case <-gc.stopCh:
8585+ gc.logger.Info("GC background process stopped")
8686+ return
8787+ case <-ctx.Done():
8888+ gc.logger.Info("GC context cancelled")
8989+ return
9090+ case <-ticker.C:
9191+ gc.logger.Info("Running nightly GC", "dryRun", gc.cfg.DryRun)
9292+ result, err := gc.Run(ctx)
9393+ if err != nil {
9494+ gc.logger.Error("Nightly GC failed", "error", err)
9595+ } else {
9696+ gc.logResult(result)
9797+ }
9898+ }
9999+ }
100100+ }()
101101+102102+ gc.logger.Info("GC background process started", "interval", gcInterval)
103103+}
104104+105105+// Stop gracefully stops the GC background process
106106+func (gc *GarbageCollector) Stop() {
107107+ close(gc.stopCh)
108108+ gc.wg.Wait()
109109+}
110110+111111+// Run executes a single GC cycle
112112+func (gc *GarbageCollector) Run(ctx context.Context) (*GCResult, error) {
113113+ start := time.Now()
114114+ result := &GCResult{}
115115+116116+ gc.logger.Info("Starting GC run", "dryRun", gc.cfg.DryRun)
117117+118118+ // Phase 1: Build referenced set from layer records
119119+ referenced, orphanedRecords, err := gc.buildReferencedSet(ctx, result)
120120+ if err != nil {
121121+ return nil, fmt.Errorf("phase 1 (build referenced set) failed: %w", err)
122122+ }
123123+124124+ gc.logger.Info("Phase 1 complete",
125125+ "referenced", len(referenced),
126126+ "orphanedRecords", len(orphanedRecords))
127127+128128+ // Phase 2: Delete orphaned layer records
129129+ if err := gc.deleteOrphanedRecords(ctx, orphanedRecords, result); err != nil {
130130+ gc.logger.Error("Phase 2 (delete orphaned records) failed", "error", err)
131131+ // Continue to phase 3 - we can still clean up blobs
132132+ }
133133+134134+ // Phase 3: Walk storage and delete unreferenced blobs
135135+ if err := gc.deleteOrphanedBlobs(ctx, referenced, result); err != nil {
136136+ return nil, fmt.Errorf("phase 3 (delete orphaned blobs) failed: %w", err)
137137+ }
138138+139139+ result.Duration = time.Since(start)
140140+ result.ReferencedBlobs = int64(len(referenced))
141141+142142+ return result, nil
143143+}
144144+145145+// buildReferencedSet iterates layer records and builds a set of referenced digests
146146+// Returns: referenced digest set, list of orphaned record rkeys, error
147147+func (gc *GarbageCollector) buildReferencedSet(ctx context.Context, result *GCResult) (map[string]bool, []string, error) {
148148+ referenced := make(map[string]bool)
149149+ var orphanedRecords []string
150150+151151+ recordsIndex := gc.pds.RecordsIndex()
152152+ if recordsIndex == nil {
153153+ return nil, nil, fmt.Errorf("records index not available")
154154+ }
155155+156156+ cursor := ""
157157+ batchSize := 1000
158158+ totalRecords := 0
159159+160160+ for {
161161+ records, nextCursor, err := recordsIndex.ListRecords(atproto.LayerCollection, batchSize, cursor, true)
162162+ if err != nil {
163163+ return nil, nil, fmt.Errorf("failed to list layer records: %w", err)
164164+ }
165165+166166+ for _, rec := range records {
167167+ totalRecords++
168168+169169+ // Decode the layer record
170170+ layer, err := gc.decodeLayerRecord(ctx, rec)
171171+ if err != nil {
172172+ gc.logger.Warn("Failed to decode layer record", "rkey", rec.Rkey, "error", err)
173173+ continue
174174+ }
175175+176176+ // Grace period: skip records from last 7 days
177177+ recordTime := tidToTime(rec.Rkey)
178178+ if time.Since(recordTime) < gcGracePeriod {
179179+ // Recent record - assume referenced, skip checking
180180+ referenced[layer.Digest] = true
181181+ continue
182182+ }
183183+184184+ // Cross-check: does the manifest still exist?
185185+ if gc.manifestExists(ctx, layer.Manifest) {
186186+ referenced[layer.Digest] = true
187187+ } else {
188188+ result.OrphanedRecords++
189189+ orphanedRecords = append(orphanedRecords, rec.Rkey)
190190+ gc.logger.Debug("Found orphaned layer record",
191191+ "rkey", rec.Rkey,
192192+ "digest", layer.Digest,
193193+ "manifest", layer.Manifest)
194194+ }
195195+ }
196196+197197+ if nextCursor == "" {
198198+ break
199199+ }
200200+ cursor = nextCursor
201201+202202+ // Progress logging
203203+ if totalRecords%10000 == 0 {
204204+ gc.logger.Info("Phase 1 progress", "processed", totalRecords)
205205+ }
206206+ }
207207+208208+ gc.logger.Info("Scanned layer records", "total", totalRecords)
209209+ return referenced, orphanedRecords, nil
210210+}
211211+212212+// deleteOrphanedRecords removes layer records whose manifests no longer exist
213213+func (gc *GarbageCollector) deleteOrphanedRecords(ctx context.Context, orphanedRkeys []string, result *GCResult) error {
214214+ for _, rkey := range orphanedRkeys {
215215+ if gc.cfg.DryRun {
216216+ gc.logger.Info("DRY-RUN: Would delete layer record", "rkey", rkey)
217217+ } else {
218218+ if err := gc.pds.DeleteLayerRecord(ctx, rkey); err != nil {
219219+ gc.logger.Error("Failed to delete layer record", "rkey", rkey, "error", err)
220220+ continue
221221+ }
222222+ result.RecordsDeleted++
223223+ gc.logger.Debug("Deleted orphaned layer record", "rkey", rkey)
224224+ }
225225+ }
226226+227227+ gc.logger.Info("Phase 2 complete",
228228+ "orphaned", len(orphanedRkeys),
229229+ "deleted", result.RecordsDeleted,
230230+ "dryRun", gc.cfg.DryRun)
231231+232232+ return nil
233233+}
234234+235235+// deleteOrphanedBlobs walks storage and deletes blobs not in the referenced set
236236+func (gc *GarbageCollector) deleteOrphanedBlobs(ctx context.Context, referenced map[string]bool, result *GCResult) error {
237237+ blobsPath := "/docker/registry/v2/blobs"
238238+239239+ err := gc.driver.Walk(ctx, blobsPath, func(fi storagedriver.FileInfo) error {
240240+ if fi.IsDir() {
241241+ return nil
242242+ }
243243+244244+ // Only process data files
245245+ if !strings.HasSuffix(fi.Path(), "/data") {
246246+ return nil
247247+ }
248248+249249+ // Extract digest from path
250250+ digest := extractDigestFromPath(fi.Path())
251251+ if digest == "" {
252252+ return nil
253253+ }
254254+255255+ // Check if referenced by any layer record
256256+ if referenced[digest] {
257257+ return nil
258258+ }
259259+260260+ result.OrphanedBlobs++
261261+262262+ if gc.cfg.DryRun {
263263+ gc.logger.Info("DRY-RUN: Would delete blob",
264264+ "digest", digest,
265265+ "size", fi.Size())
266266+ } else {
267267+ if err := gc.driver.Delete(ctx, fi.Path()); err != nil {
268268+ gc.logger.Error("Failed to delete blob", "path", fi.Path(), "error", err)
269269+ return nil // Continue with other blobs
270270+ }
271271+ result.BlobsDeleted++
272272+ result.BytesReclaimed += fi.Size()
273273+ gc.logger.Debug("Deleted orphaned blob",
274274+ "digest", digest,
275275+ "size", fi.Size())
276276+ }
277277+278278+ return nil
279279+ })
280280+281281+ if err != nil {
282282+ return fmt.Errorf("walk storage failed: %w", err)
283283+ }
284284+285285+ gc.logger.Info("Phase 3 complete",
286286+ "orphanedBlobs", result.OrphanedBlobs,
287287+ "deleted", result.BlobsDeleted,
288288+ "reclaimed", result.BytesReclaimed,
289289+ "dryRun", gc.cfg.DryRun)
290290+291291+ return nil
292292+}
293293+294294+// decodeLayerRecord reads and decodes a layer record from the PDS
295295+func (gc *GarbageCollector) decodeLayerRecord(ctx context.Context, rec pds.Record) (*atproto.LayerRecord, error) {
296296+ // Get the record from the repo
297297+ recordPath := rec.Collection + "/" + rec.Rkey
298298+ _, recBytes, err := gc.pds.GetRecordBytes(ctx, recordPath)
299299+ if err != nil {
300300+ return nil, fmt.Errorf("get record bytes: %w", err)
301301+ }
302302+303303+ // Decode the layer record
304304+ var layer atproto.LayerRecord
305305+ if err := layer.UnmarshalCBOR(bytes.NewReader(*recBytes)); err != nil {
306306+ return nil, fmt.Errorf("unmarshal CBOR: %w", err)
307307+ }
308308+309309+ return &layer, nil
310310+}
311311+312312+// manifestExists checks if a manifest still exists at the given AT-URI
313313+func (gc *GarbageCollector) manifestExists(ctx context.Context, manifestURI string) bool {
314314+ // Parse AT-URI: at://did:plc:xxx/io.atcr.manifest/abc123
315315+ parts := parseATURI(manifestURI)
316316+ if parts == nil {
317317+ gc.logger.Debug("Could not parse manifest URI", "uri", manifestURI)
318318+ return false // Can't parse, assume orphaned
319319+ }
320320+321321+ // Check if the manifest record still exists via XRPC
322322+ exists, err := gc.checkManifestViaXRPC(ctx, parts.DID, parts.Collection, parts.Rkey)
323323+ if err != nil {
324324+ // Network error - assume manifest exists (safe default)
325325+ gc.logger.Warn("Failed to check manifest existence, assuming exists",
326326+ "uri", manifestURI,
327327+ "error", err)
328328+ return true
329329+ }
330330+331331+ return exists
332332+}
333333+334334+// atURIParts contains parsed components of an AT-URI
335335+type atURIParts struct {
336336+ DID string
337337+ Collection string
338338+ Rkey string
339339+}
340340+341341+// parseATURI parses an AT-URI into its components
342342+// Format: at://did:plc:xxx/collection/rkey
343343+func parseATURI(uri string) *atURIParts {
344344+ if !strings.HasPrefix(uri, "at://") {
345345+ return nil
346346+ }
347347+348348+ // Remove at:// prefix
349349+ path := strings.TrimPrefix(uri, "at://")
350350+351351+ // Split by /
352352+ parts := strings.SplitN(path, "/", 3)
353353+ if len(parts) != 3 {
354354+ return nil
355355+ }
356356+357357+ return &atURIParts{
358358+ DID: parts[0],
359359+ Collection: parts[1],
360360+ Rkey: parts[2],
361361+ }
362362+}
363363+364364+// checkManifestViaXRPC checks if a manifest record exists by querying the user's PDS
365365+func (gc *GarbageCollector) checkManifestViaXRPC(ctx context.Context, did, collection, rkey string) (bool, error) {
366366+ // Resolve DID to PDS endpoint
367367+ pdsEndpoint, err := atproto.ResolveDIDToPDS(ctx, did)
368368+ if err != nil {
369369+ return false, fmt.Errorf("resolve PDS: %w", err)
370370+ }
371371+372372+ // Build XRPC URL
373373+ url := fmt.Sprintf("%s/xrpc/com.atproto.repo.getRecord?repo=%s&collection=%s&rkey=%s",
374374+ pdsEndpoint, did, collection, rkey)
375375+376376+ // Make request with timeout
377377+ client := &http.Client{Timeout: 10 * time.Second}
378378+ req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
379379+ if err != nil {
380380+ return false, fmt.Errorf("create request: %w", err)
381381+ }
382382+383383+ resp, err := client.Do(req)
384384+ if err != nil {
385385+ return false, fmt.Errorf("http request: %w", err)
386386+ }
387387+ defer resp.Body.Close()
388388+389389+ // Consume body to allow connection reuse
390390+ _, _ = io.Copy(io.Discard, resp.Body)
391391+392392+ switch resp.StatusCode {
393393+ case http.StatusOK:
394394+ return true, nil
395395+ case http.StatusNotFound, http.StatusBadRequest:
396396+ // Record doesn't exist
397397+ return false, nil
398398+ default:
399399+ // Read error body for debugging
400400+ body, _ := io.ReadAll(resp.Body)
401401+ return false, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body))
402402+ }
403403+}
404404+405405+// tidToTime extracts the timestamp from a TID (Timestamp ID)
406406+// TIDs are 13-character base32 encoded timestamps with counter
407407+func tidToTime(tid string) time.Time {
408408+ // TIDs are base32-sortable timestamps
409409+ // Use indigo's syntax package for proper parsing
410410+ t, err := syntax.ParseTID(tid)
411411+ if err != nil {
412412+ // Return zero time - will be older than grace period
413413+ return time.Time{}
414414+ }
415415+ return t.Time()
416416+}
417417+418418+// extractDigestFromPath extracts a digest from a storage path
419419+// Path format: /docker/registry/v2/blobs/{algorithm}/{xx}/{hash}/data
420420+// Returns: {algorithm}:{hash}
421421+func extractDigestFromPath(path string) string {
422422+ // Match pattern: /blobs/{alg}/{xx}/{hash}/data
423423+ re := regexp.MustCompile(`/blobs/([^/]+)/[^/]+/([^/]+)/data$`)
424424+ matches := re.FindStringSubmatch(path)
425425+ if len(matches) != 3 {
426426+ return ""
427427+ }
428428+ return matches[1] + ":" + matches[2]
429429+}
430430+431431+// logResult logs the GC result in a structured format
432432+func (gc *GarbageCollector) logResult(result *GCResult) {
433433+ gc.logger.Info("GC run complete",
434434+ "duration", result.Duration,
435435+ "referencedBlobs", result.ReferencedBlobs,
436436+ "orphanedRecords", result.OrphanedRecords,
437437+ "recordsDeleted", result.RecordsDeleted,
438438+ "orphanedBlobs", result.OrphanedBlobs,
439439+ "blobsDeleted", result.BlobsDeleted,
440440+ "bytesReclaimed", result.BytesReclaimed,
441441+ "dryRun", gc.cfg.DryRun)
442442+443443+ // Also log as JSON for easier parsing
444444+ resultJSON, _ := json.Marshal(result)
445445+ gc.logger.Debug("GC result JSON", "result", string(resultJSON))
446446+}
+228
pkg/hold/gc/gc_test.go
···11+package gc
22+33+import (
44+ "testing"
55+ "time"
66+)
77+88+func TestExtractDigestFromPath(t *testing.T) {
99+ tests := []struct {
1010+ name string
1111+ path string
1212+ expected string
1313+ }{
1414+ {
1515+ name: "valid sha256 path",
1616+ path: "/docker/registry/v2/blobs/sha256/ab/abc123def456/data",
1717+ expected: "sha256:abc123def456",
1818+ },
1919+ {
2020+ name: "valid sha256 path with full hash",
2121+ path: "/docker/registry/v2/blobs/sha256/e3/e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855/data",
2222+ expected: "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
2323+ },
2424+ {
2525+ name: "invalid path - no data suffix",
2626+ path: "/docker/registry/v2/blobs/sha256/ab/abc123def456",
2727+ expected: "",
2828+ },
2929+ {
3030+ name: "invalid path - wrong structure",
3131+ path: "/some/other/path/data",
3232+ expected: "",
3333+ },
3434+ {
3535+ name: "empty path",
3636+ path: "",
3737+ expected: "",
3838+ },
3939+ {
4040+ name: "uploads temp path (should not match)",
4141+ path: "/docker/registry/v2/uploads/temp-uuid/data",
4242+ expected: "",
4343+ },
4444+ }
4545+4646+ for _, tt := range tests {
4747+ t.Run(tt.name, func(t *testing.T) {
4848+ result := extractDigestFromPath(tt.path)
4949+ if result != tt.expected {
5050+ t.Errorf("extractDigestFromPath(%q) = %q, want %q", tt.path, result, tt.expected)
5151+ }
5252+ })
5353+ }
5454+}
5555+5656+func TestParseATURI(t *testing.T) {
5757+ tests := []struct {
5858+ name string
5959+ uri string
6060+ expectNil bool
6161+ did string
6262+ collection string
6363+ rkey string
6464+ }{
6565+ {
6666+ name: "valid AT-URI",
6767+ uri: "at://did:plc:abc123/io.atcr.manifest/xyz789",
6868+ expectNil: false,
6969+ did: "did:plc:abc123",
7070+ collection: "io.atcr.manifest",
7171+ rkey: "xyz789",
7272+ },
7373+ {
7474+ name: "valid AT-URI with did:web",
7575+ uri: "at://did:web:example.com/io.atcr.manifest/manifest123",
7676+ expectNil: false,
7777+ did: "did:web:example.com",
7878+ collection: "io.atcr.manifest",
7979+ rkey: "manifest123",
8080+ },
8181+ {
8282+ name: "invalid - no at:// prefix",
8383+ uri: "did:plc:abc123/io.atcr.manifest/xyz789",
8484+ expectNil: true,
8585+ },
8686+ {
8787+ name: "invalid - missing rkey",
8888+ uri: "at://did:plc:abc123/io.atcr.manifest",
8989+ expectNil: true,
9090+ },
9191+ {
9292+ name: "invalid - empty string",
9393+ uri: "",
9494+ expectNil: true,
9595+ },
9696+ {
9797+ name: "invalid - http URL",
9898+ uri: "https://example.com/xrpc/com.atproto.repo.getRecord",
9999+ expectNil: true,
100100+ },
101101+ }
102102+103103+ for _, tt := range tests {
104104+ t.Run(tt.name, func(t *testing.T) {
105105+ result := parseATURI(tt.uri)
106106+ if tt.expectNil {
107107+ if result != nil {
108108+ t.Errorf("parseATURI(%q) = %+v, want nil", tt.uri, result)
109109+ }
110110+ return
111111+ }
112112+113113+ if result == nil {
114114+ t.Errorf("parseATURI(%q) = nil, want non-nil", tt.uri)
115115+ return
116116+ }
117117+118118+ if result.DID != tt.did {
119119+ t.Errorf("parseATURI(%q).DID = %q, want %q", tt.uri, result.DID, tt.did)
120120+ }
121121+ if result.Collection != tt.collection {
122122+ t.Errorf("parseATURI(%q).Collection = %q, want %q", tt.uri, result.Collection, tt.collection)
123123+ }
124124+ if result.Rkey != tt.rkey {
125125+ t.Errorf("parseATURI(%q).Rkey = %q, want %q", tt.uri, result.Rkey, tt.rkey)
126126+ }
127127+ })
128128+ }
129129+}
130130+131131+func TestTidToTime(t *testing.T) {
132132+ // Test with known TID format
133133+ // TIDs are base32-encoded timestamps with counter
134134+ tests := []struct {
135135+ name string
136136+ tid string
137137+ expectZero bool
138138+ minAge time.Duration // Minimum expected age (roughly)
139139+ }{
140140+ {
141141+ name: "valid TID from 2024",
142142+ tid: "3l7nqy25tks2c", // A real TID from around 2024
143143+ expectZero: false,
144144+ },
145145+ {
146146+ name: "invalid TID - too short",
147147+ tid: "abc",
148148+ expectZero: true,
149149+ },
150150+ {
151151+ name: "invalid TID - empty",
152152+ tid: "",
153153+ expectZero: true,
154154+ },
155155+ {
156156+ name: "invalid TID - not base32",
157157+ tid: "!!!!!!!!!!!!!!",
158158+ expectZero: true,
159159+ },
160160+ }
161161+162162+ for _, tt := range tests {
163163+ t.Run(tt.name, func(t *testing.T) {
164164+ result := tidToTime(tt.tid)
165165+ if tt.expectZero {
166166+ if !result.IsZero() {
167167+ t.Errorf("tidToTime(%q) = %v, want zero time", tt.tid, result)
168168+ }
169169+ return
170170+ }
171171+172172+ if result.IsZero() {
173173+ t.Errorf("tidToTime(%q) = zero time, want non-zero", tt.tid)
174174+ }
175175+ })
176176+ }
177177+}
178178+179179+func TestLoadConfigFromEnv(t *testing.T) {
180180+ // Test default values
181181+ t.Run("default values", func(t *testing.T) {
182182+ // Clear any existing env vars
183183+ t.Setenv("GC_ENABLED", "")
184184+ t.Setenv("GC_DRY_RUN", "")
185185+186186+ cfg := LoadConfigFromEnv()
187187+188188+ // Default: enabled
189189+ if !cfg.Enabled {
190190+ t.Error("expected Enabled to be true by default")
191191+ }
192192+193193+ // Default: dry run enabled
194194+ if !cfg.DryRun {
195195+ t.Error("expected DryRun to be true by default")
196196+ }
197197+ })
198198+199199+ t.Run("disabled via env", func(t *testing.T) {
200200+ t.Setenv("GC_ENABLED", "false")
201201+ t.Setenv("GC_DRY_RUN", "false")
202202+203203+ cfg := LoadConfigFromEnv()
204204+205205+ if cfg.Enabled {
206206+ t.Error("expected Enabled to be false when GC_ENABLED=false")
207207+ }
208208+209209+ if cfg.DryRun {
210210+ t.Error("expected DryRun to be false when GC_DRY_RUN=false")
211211+ }
212212+ })
213213+214214+ t.Run("enabled via env", func(t *testing.T) {
215215+ t.Setenv("GC_ENABLED", "true")
216216+ t.Setenv("GC_DRY_RUN", "true")
217217+218218+ cfg := LoadConfigFromEnv()
219219+220220+ if !cfg.Enabled {
221221+ t.Error("expected Enabled to be true when GC_ENABLED=true")
222222+ }
223223+224224+ if !cfg.DryRun {
225225+ t.Error("expected DryRun to be true when GC_DRY_RUN=true")
226226+ }
227227+ })
228228+}
+19
pkg/hold/pds/layer.go
···4949 return nil, fmt.Errorf("GetLayerRecord not yet implemented - use via XRPC listRecords instead")
5050}
51515252+// DeleteLayerRecord deletes a layer record by rkey
5353+// This deletes from both the repo (MST) and the records index
5454+func (p *HoldPDS) DeleteLayerRecord(ctx context.Context, rkey string) error {
5555+ // Delete from repo (MST)
5656+ if err := p.repomgr.DeleteRecord(ctx, p.uid, atproto.LayerCollection, rkey); err != nil {
5757+ return fmt.Errorf("failed to delete from repo: %w", err)
5858+ }
5959+6060+ // Delete from index
6161+ if p.recordsIndex != nil {
6262+ if err := p.recordsIndex.DeleteRecord(atproto.LayerCollection, rkey); err != nil {
6363+ // Log but don't fail - index will resync on backfill
6464+ fmt.Printf("Warning: failed to delete from records index: %v\n", err)
6565+ }
6666+ }
6767+6868+ return nil
6969+}
7070+5271// ListLayerRecords lists layer records with pagination
5372// Returns records, next cursor (empty if no more), and error
5473// Note: This is a simplified implementation. For production, consider adding filters
+30
pkg/hold/pds/server.go
···152152 return p.uid
153153}
154154155155+// GetRecordBytes retrieves raw CBOR bytes for a record
156156+// recordPath format: "collection/rkey"
157157+func (p *HoldPDS) GetRecordBytes(ctx context.Context, recordPath string) (cid.Cid, *[]byte, error) {
158158+ session, err := p.carstore.ReadOnlySession(p.uid)
159159+ if err != nil {
160160+ return cid.Undef, nil, fmt.Errorf("failed to create session: %w", err)
161161+ }
162162+163163+ head, err := p.carstore.GetUserRepoHead(ctx, p.uid)
164164+ if err != nil {
165165+ return cid.Undef, nil, fmt.Errorf("failed to get repo head: %w", err)
166166+ }
167167+168168+ if !head.Defined() {
169169+ return cid.Undef, nil, fmt.Errorf("repo is empty")
170170+ }
171171+172172+ repoHandle, err := repo.OpenRepo(ctx, session, head)
173173+ if err != nil {
174174+ return cid.Undef, nil, fmt.Errorf("failed to open repo: %w", err)
175175+ }
176176+177177+ recordCID, recBytes, err := repoHandle.GetRecordBytes(ctx, recordPath)
178178+ if err != nil {
179179+ return cid.Undef, nil, fmt.Errorf("failed to get record: %w", err)
180180+ }
181181+182182+ return recordCID, recBytes, nil
183183+}
184184+155185// Bootstrap initializes the hold with the captain record, owner as first crew member, and profile
156186func (p *HoldPDS) Bootstrap(ctx context.Context, storageDriver driver.StorageDriver, ownerDID string, public bool, allowAllCrew bool, avatarURL, region string) error {
157187 if ownerDID == "" {