A container registry that uses the AT Protocol for manifest storage and S3 for blob storage. atcr.io
docker container atproto go
81
fork

Configure Feed

Select the types of activity you want to include in your feed.

holds now listen for deletes and labelers for takedowns. GC will defer takedowns for a grace period in case of reversal

+2145 -30
+1 -1
.air.hold.toml
··· 6 6 cmd = "go build -buildvcs=false -o ./tmp/atcr-hold ./cmd/hold" 7 7 entrypoint = ["./tmp/atcr-hold", "serve", "--config", "config-hold.example.yaml"] 8 8 include_ext = ["go", "html", "css", "js"] 9 - exclude_dir = ["bin", "tmp", "vendor", "deploy", "docs", ".git", "dist", "pkg/appview", "node_modules"] 9 + exclude_dir = ["bin", "tmp", "vendor", "deploy", "docs", ".git", "dist", "node_modules", "scanner", "pkg/appview", "pkg/labeler"] 10 10 exclude_regex = ["_test\\.go$", "cbor_gen\\.go$", "\\.min\\.js$", "public/css/style\\.css$", "public/icons\\.svg$"] 11 11 delay = 3000 12 12 stop_on_error = true
+1 -1
.air.labeler.toml
··· 5 5 cmd = "go build -buildvcs=false -o ./tmp/atcr-labeler ./cmd/labeler" 6 6 entrypoint = ["./tmp/atcr-labeler", "serve", "--config", "config-labeler.example.yaml"] 7 7 include_ext = ["go", "html", "css", "js"] 8 - exclude_dir = ["bin", "tmp", "vendor", "deploy", "docs", ".git", "dist", "pkg/appview", "pkg/hold", "node_modules"] 8 + exclude_dir = ["bin", "tmp", "vendor", "deploy", "docs", ".git", "dist", "node_modules", "scanner", "pkg/appview", "pkg/hold"] 9 9 exclude_regex = ["_test\\.go$", "cbor_gen\\.go$", "\\.min\\.js$", "public/css/style\\.css$", "public/icons\\.svg$"] 10 10 delay = 3000 11 11 stop_on_error = true
+1 -1
.air.toml
··· 10 10 cmd = "go build -tags billing -buildvcs=false -o ./tmp/atcr-appview ./cmd/appview" 11 11 entrypoint = ["./tmp/atcr-appview", "serve", "--config", "config-appview.example.yaml"] 12 12 include_ext = ["go", "html", "css", "js"] 13 - exclude_dir = ["bin", "tmp", "vendor", "deploy", "docs", ".git", "dist", "node_modules", "pkg/hold"] 13 + exclude_dir = ["bin", "tmp", "vendor", "deploy", "docs", ".git", "dist", "node_modules", "scanner", "pkg/hold", "pkg/labeler"] 14 14 exclude_regex = ["_test\\.go$", "cbor_gen\\.go$", "\\.min\\.js$", "public/css/style\\.css$", "public/icons\\.svg$"] 15 15 delay = 3000 16 16 stop_on_error = true
+8
config-appview.example.yaml
··· 82 82 key_path: /var/lib/atcr/auth/private-key.pem 83 83 # X.509 certificate matching the JWT signing key. 84 84 cert_path: /var/lib/atcr/auth/private-key.crt 85 + # Credential helper download settings. 86 + credential_helper: 87 + # Tangled repository URL for credential helper downloads. 88 + tangled_repo: "" 85 89 # Legal page customization for self-hosted instances. 86 90 legal: 87 91 # Organization name for Terms of Service and Privacy Policy. Defaults to server.client_name. ··· 92 96 ai: 93 97 # Anthropic API key for AI Image Advisor. Also reads CLAUDE_API_KEY env var as fallback. 94 98 api_key: "" 99 + # ATProto labeler for content moderation (DMCA takedowns). 100 + labeler: 101 + # DID or URL of the ATProto labeler (e.g., did:web:labeler.atcr.io). Empty disables label filtering. 102 + did: "" 95 103 # Stripe billing integration (requires -tags billing build). 96 104 billing: 97 105 # Stripe secret key. Can also be set via STRIPE_SECRET_KEY env var (takes precedence). Billing is enabled automatically when set.
+6
config-hold.example.yaml
··· 135 135 secret: "" 136 136 # Minimum interval between re-scans of the same manifest. When set, the hold proactively scans manifests when the scanner is idle. Default: 168h (7 days). Set to 0 to disable. 137 137 rescan_interval: 168h0m0s 138 + # Labeler subscription settings. When configured, the hold consumes takedown labels from the named labeler and purges affected records on receipt; GC consults the cache to gate blob cleanup. Empty subscribe_url disables. 139 + labeler: 140 + # DID or URL of the ATProto labeler (e.g., did:web:labeler.atcr.io). Empty disables labeler integration. 141 + did: "" 142 + # Reversibility window for takedowns. Blobs survive this long after a takedown so the action can be reversed. After this window the GC reclaims them. Default: 720h (30 days). 143 + grace_window: 720h0m0s
+5
deploy/upcloud/configs/hold.yaml.tmpl
··· 61 61 scanner: 62 62 secret: "{{.ScannerSecret}}" 63 63 rescan_interval: 168h0m0s 64 + labeler: 65 + # Subscribe to the appview's labeler so takedowns purge records on this 66 + # hold and the GC honors the reversibility window. Empty disables. 67 + did: "did:web:seamark.dev" 68 + grace_window: 720h0m0s 64 69
+7
docker-compose.yml
··· 65 65 HOLD_REGISTRATION_ALLOW_ALL_CREW: true 66 66 HOLD_SERVER_TEST_MODE: true 67 67 HOLD_LOG_LEVEL: debug 68 + # Subscribe to the dev labeler so takedowns purge records on this hold and 69 + # GC honors the reversibility window. Same value the appview uses for 70 + # ATCR_LABELER_DID — accepts a did:web identifier or a raw URL. 71 + HOLD_LABELER_DID: http://172.28.0.4:5002 72 + # Short grace window for dev so the takedown→GC path is exercisable without 73 + # waiting weeks. Production default is 720h (30 days). 74 + HOLD_LABELER_GRACE_WINDOW: 1h 68 75 LOG_SHIPPER_BACKEND: victoria 69 76 LOG_SHIPPER_URL: http://172.28.0.10:9428 70 77 # S3 storage config comes from env_file (AWS_*, S3_*)
+2
docs/HOLD_XRPC_ENDPOINTS.md
··· 37 37 |----------|--------|-------------| 38 38 | `/xrpc/com.atproto.repo.deleteRecord` | POST | Delete a record | 39 39 | `/xrpc/com.atproto.repo.uploadBlob` | POST | Upload ATProto blob | 40 + | `/xrpc/io.atcr.hold.purgeManifest` | POST | Purge layer/scan/image-config records for a manifest (eager delete + takedown). Idempotent. | 40 41 41 42 ### Auth Required (Service Token or DPoP) 42 43 ··· 82 83 | `/xrpc/io.atcr.hold.getQuota` | GET | none | Get user quota info | 83 84 | `/xrpc/io.atcr.hold.getLayersForManifest` | GET | none | Get layer records for a manifest AT-URI | 84 85 | `/xrpc/io.atcr.hold.image.getConfig` | GET | none | Get OCI image config record for a manifest digest | 86 + | `/xrpc/io.atcr.hold.purgeManifest` | POST | owner/crew admin | Purge layer/scan/image-config records for a single manifest URI. Called by appview on UI delete; called internally on takedown receipt. Does not delete S3 blobs (GC handles those). | 85 87 | `/xrpc/io.atcr.hold.listTiers` | GET | none | List hold's available tiers with quotas and features (scanOnPush) | 86 88 | `/xrpc/io.atcr.hold.updateCrewTier` | POST | appview token | Update crew member's tier | 87 89
+54
lexicons/io/atcr/hold/purgeManifest.json
··· 1 + { 2 + "lexicon": 1, 3 + "id": "io.atcr.hold.purgeManifest", 4 + "defs": { 5 + "main": { 6 + "type": "procedure", 7 + "description": "Purge layer, scan, and image-config records associated with a manifest. Used by the appview when a user deletes a manifest, and by the hold's own labeler subscriber on takedown receipt. Idempotent: missing records are not errors. Does not delete S3 blobs (GC handles that based on remaining references).", 8 + "input": { 9 + "encoding": "application/json", 10 + "schema": { 11 + "type": "object", 12 + "required": ["manifestUri"], 13 + "properties": { 14 + "manifestUri": { 15 + "type": "string", 16 + "format": "at-uri", 17 + "description": "AT-URI of the manifest record, e.g. at://did:plc:xyz/io.atcr.manifest/<digest>" 18 + } 19 + } 20 + } 21 + }, 22 + "output": { 23 + "encoding": "application/json", 24 + "schema": { 25 + "type": "object", 26 + "required": ["success", "layersDeleted", "scanDeleted", "imageConfigDeleted"], 27 + "properties": { 28 + "success": { 29 + "type": "boolean", 30 + "description": "Whether the purge completed successfully" 31 + }, 32 + "layersDeleted": { 33 + "type": "integer", 34 + "description": "Number of layer records deleted" 35 + }, 36 + "scanDeleted": { 37 + "type": "boolean", 38 + "description": "Whether a scan record was deleted" 39 + }, 40 + "imageConfigDeleted": { 41 + "type": "boolean", 42 + "description": "Whether an image config record was deleted" 43 + } 44 + } 45 + } 46 + }, 47 + "errors": [ 48 + { "name": "AuthRequired" }, 49 + { "name": "InvalidRequest" }, 50 + { "name": "PurgeFailed" } 51 + ] 52 + } 53 + } 54 + }
+12 -5
pkg/appview/db/labels.go
··· 22 22 } 23 23 24 24 // Label represents an ATProto label mirrored from a labeler service. 25 + // Exp is the optional expiration timestamp from the ATProto label spec; 26 + // nil means the label does not expire. 25 27 type Label struct { 26 28 ID int64 27 29 Src string ··· 29 31 Val string 30 32 Neg bool 31 33 Cts time.Time 34 + Exp *time.Time 32 35 SubjectDID string 33 36 SubjectRepo string 34 37 Seq int64 ··· 49 52 WHERE l2.src = l1.src AND l2.uri = l1.uri AND l2.val = l1.val 50 53 AND l2.neg = 1 AND l2.id > l1.id 51 54 ) 52 - AND (l1.exp IS NULL OR l1.exp > CURRENT_TIMESTAMP) 55 + AND (l1.exp IS NULL OR datetime(l1.exp) > CURRENT_TIMESTAMP) 53 56 )`, 54 57 did, repository, 55 58 ).Scan(&exists) ··· 58 61 59 62 // UpsertLabel inserts or updates a label from a labeler subscription. 60 63 func UpsertLabel(db DBTX, l *Label) error { 64 + var exp any 65 + if l.Exp != nil { 66 + exp = l.Exp.UTC().Format(time.RFC3339) 67 + } 61 68 _, err := db.Exec( 62 - `INSERT INTO labels (src, uri, val, neg, cts, subject_did, subject_repo, seq) 63 - VALUES (?, ?, ?, ?, ?, ?, ?, ?) 64 - ON CONFLICT(src, uri, val, neg) DO UPDATE SET cts = excluded.cts, seq = excluded.seq`, 65 - l.Src, l.URI, l.Val, l.Neg, l.Cts.UTC().Format(time.RFC3339), 69 + `INSERT INTO labels (src, uri, val, neg, cts, exp, subject_did, subject_repo, seq) 70 + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) 71 + ON CONFLICT(src, uri, val, neg) DO UPDATE SET cts = excluded.cts, exp = excluded.exp, seq = excluded.seq`, 72 + l.Src, l.URI, l.Val, l.Neg, l.Cts.UTC().Format(time.RFC3339), exp, 66 73 l.SubjectDID, l.SubjectRepo, l.Seq, 67 74 ) 68 75 return err
+138
pkg/appview/db/labels_test.go
··· 1 + package db 2 + 3 + import ( 4 + "testing" 5 + "time" 6 + ) 7 + 8 + // TestIsTakenDown_ExpRespected verifies that the takedown check honors the 9 + // optional ATProto label expiration: NULL and future-dated exp values count 10 + // the label as active, while past-dated exp values exclude it. 11 + func TestIsTakenDown_ExpRespected(t *testing.T) { 12 + db, err := InitDB("file:TestIsTakenDown_ExpRespected?mode=memory&cache=shared", LibsqlConfig{}) 13 + if err != nil { 14 + t.Fatalf("init db: %v", err) 15 + } 16 + defer db.Close() 17 + 18 + now := time.Now().UTC() 19 + past := now.Add(-1 * time.Hour) 20 + future := now.Add(1 * time.Hour) 21 + 22 + cases := []struct { 23 + name string 24 + did string 25 + exp *time.Time 26 + wantHit bool 27 + }{ 28 + {"no_exp_active", "did:plc:noexp", nil, true}, 29 + {"future_exp_active", "did:plc:future", &future, true}, 30 + {"past_exp_inactive", "did:plc:past", &past, false}, 31 + } 32 + 33 + for _, tc := range cases { 34 + label := &Label{ 35 + Src: "did:plc:labeler", 36 + URI: "at://" + tc.did + "/io.atcr.repo.page/foo", 37 + Val: "!takedown", 38 + Neg: false, 39 + Cts: now, 40 + Exp: tc.exp, 41 + SubjectDID: tc.did, 42 + Seq: 1, 43 + } 44 + if err := UpsertLabel(db, label); err != nil { 45 + t.Fatalf("%s: upsert label: %v", tc.name, err) 46 + } 47 + 48 + got, err := IsTakenDown(db, tc.did, "foo") 49 + if err != nil { 50 + t.Fatalf("%s: IsTakenDown: %v", tc.name, err) 51 + } 52 + if got != tc.wantHit { 53 + t.Errorf("%s: IsTakenDown = %v, want %v", tc.name, got, tc.wantHit) 54 + } 55 + } 56 + } 57 + 58 + // TestIsTakenDown_NegationWinsOverExp verifies that a later negation row 59 + // suppresses an earlier non-expired takedown — exp doesn't shield it from 60 + // being reversed by a !takedown neg=1 with a higher id. 61 + func TestIsTakenDown_NegationWinsOverExp(t *testing.T) { 62 + db, err := InitDB("file:TestIsTakenDown_NegationWinsOverExp?mode=memory&cache=shared", LibsqlConfig{}) 63 + if err != nil { 64 + t.Fatalf("init db: %v", err) 65 + } 66 + defer db.Close() 67 + 68 + did := "did:plc:reversed" 69 + uri := "at://" + did + "/io.atcr.repo.page/bar" 70 + src := "did:plc:labeler" 71 + now := time.Now().UTC() 72 + future := now.Add(24 * time.Hour) 73 + 74 + if err := UpsertLabel(db, &Label{ 75 + Src: src, URI: uri, Val: "!takedown", Neg: false, 76 + Cts: now, Exp: &future, SubjectDID: did, Seq: 1, 77 + }); err != nil { 78 + t.Fatalf("seed takedown: %v", err) 79 + } 80 + if err := UpsertLabel(db, &Label{ 81 + Src: src, URI: uri, Val: "!takedown", Neg: true, 82 + Cts: now.Add(time.Minute), SubjectDID: did, Seq: 2, 83 + }); err != nil { 84 + t.Fatalf("seed reversal: %v", err) 85 + } 86 + 87 + got, err := IsTakenDown(db, did, "bar") 88 + if err != nil { 89 + t.Fatalf("IsTakenDown: %v", err) 90 + } 91 + if got { 92 + t.Errorf("IsTakenDown = true, want false (reversal should suppress non-expired takedown)") 93 + } 94 + } 95 + 96 + // TestUpsertLabel_ExpUpdatedOnConflict verifies that upserting an existing 97 + // label row updates the exp column (not just cts/seq) — so a labeler that 98 + // extends or removes an expiration is reflected. 99 + func TestUpsertLabel_ExpUpdatedOnConflict(t *testing.T) { 100 + db, err := InitDB("file:TestUpsertLabel_ExpUpdatedOnConflict?mode=memory&cache=shared", LibsqlConfig{}) 101 + if err != nil { 102 + t.Fatalf("init db: %v", err) 103 + } 104 + defer db.Close() 105 + 106 + did := "did:plc:updated" 107 + src := "did:plc:labeler" 108 + uri := "at://" + did + "/io.atcr.repo.page/baz" 109 + now := time.Now().UTC() 110 + past := now.Add(-1 * time.Hour) 111 + future := now.Add(1 * time.Hour) 112 + 113 + // Seed with an already-expired label — IsTakenDown should be false. 114 + if err := UpsertLabel(db, &Label{ 115 + Src: src, URI: uri, Val: "!takedown", Neg: false, 116 + Cts: now, Exp: &past, SubjectDID: did, Seq: 1, 117 + }); err != nil { 118 + t.Fatalf("seed: %v", err) 119 + } 120 + if got, _ := IsTakenDown(db, did, "baz"); got { 121 + t.Fatalf("expected expired label to be inactive before update") 122 + } 123 + 124 + // Re-upsert with a future exp — same UNIQUE key, should update exp. 125 + if err := UpsertLabel(db, &Label{ 126 + Src: src, URI: uri, Val: "!takedown", Neg: false, 127 + Cts: now.Add(time.Minute), Exp: &future, SubjectDID: did, Seq: 2, 128 + }); err != nil { 129 + t.Fatalf("re-upsert: %v", err) 130 + } 131 + got, err := IsTakenDown(db, did, "baz") 132 + if err != nil { 133 + t.Fatalf("IsTakenDown after update: %v", err) 134 + } 135 + if !got { 136 + t.Errorf("IsTakenDown = false, want true after extending exp into the future") 137 + } 138 + }
+3
pkg/appview/db/migrations/0024_add_exp_to_labels.yaml
··· 1 + description: Add optional exp (expiration) column to labels for ATProto label spec 2 + query: | 3 + ALTER TABLE labels ADD COLUMN exp TIMESTAMP;
+1 -1
pkg/appview/db/queries.go
··· 37 37 WHERE l2.src = l1.src AND l2.uri = l1.uri AND l2.val = l1.val 38 38 AND l2.neg = 1 AND l2.id > l1.id 39 39 ) 40 - AND (l1.exp IS NULL OR l1.exp > CURRENT_TIMESTAMP) 40 + AND (l1.exp IS NULL OR datetime(l1.exp) > CURRENT_TIMESTAMP) 41 41 )` 42 42 } 43 43
+1
pkg/appview/db/schema.sql
··· 306 306 val TEXT NOT NULL, 307 307 neg BOOLEAN NOT NULL DEFAULT 0, 308 308 cts TIMESTAMP NOT NULL, 309 + exp TIMESTAMP, 309 310 subject_did TEXT NOT NULL, 310 311 subject_repo TEXT NOT NULL DEFAULT '', 311 312 seq INTEGER NOT NULL DEFAULT 0,
+118
pkg/appview/handlers/hold_purge.go
··· 1 + package handlers 2 + 3 + import ( 4 + "bytes" 5 + "context" 6 + "encoding/json" 7 + "io" 8 + "log/slog" 9 + "net/http" 10 + "time" 11 + 12 + "atcr.io/pkg/atproto" 13 + "atcr.io/pkg/auth" 14 + "atcr.io/pkg/auth/oauth" 15 + ) 16 + 17 + // purgeManifestRequest is the JSON body sent to io.atcr.hold.purgeManifest. 18 + type purgeManifestRequest struct { 19 + ManifestURI string `json:"manifestUri"` 20 + } 21 + 22 + // purgeOnHold tells the hold to delete the layer, scan, and image-config 23 + // records associated with a single manifest. This is best-effort: callers 24 + // should treat all errors as "log and continue" because lazy GC on the hold 25 + // will catch up either way (and on third-party holds the user may not even 26 + // have the captain/crew-admin permission needed for the call to succeed). 27 + // 28 + // holdDID identifies which hold owns the manifest's blobs (typically the 29 + // `hold_endpoint` column on the manifests row, or a freshly-resolved value 30 + // from the manifest record). userDID + pdsEndpoint are the OAuth-acting 31 + // user — the service token is minted from their PDS with audience = holdDID. 32 + func purgeOnHold(ctx context.Context, refresher *oauth.Refresher, userDID, pdsEndpoint, holdDID, manifestURI string) { 33 + if holdDID == "" || manifestURI == "" { 34 + return 35 + } 36 + if refresher == nil { 37 + slog.Debug("purgeOnHold: OAuth refresher unavailable; skipping", 38 + "hold_did", holdDID, "manifest", manifestURI) 39 + return 40 + } 41 + 42 + timeoutCtx, cancel := context.WithTimeout(ctx, 10*time.Second) 43 + defer cancel() 44 + 45 + holdURL, err := atproto.ResolveHoldURL(timeoutCtx, holdDID) 46 + if err != nil { 47 + slog.Warn("purgeOnHold: failed to resolve hold URL", 48 + "hold_did", holdDID, "error", err) 49 + return 50 + } 51 + 52 + serviceToken, err := auth.GetOrFetchServiceToken(timeoutCtx, refresher, userDID, holdDID, pdsEndpoint) 53 + if err != nil { 54 + slog.Warn("purgeOnHold: failed to mint service token", 55 + "hold_did", holdDID, "user_did", userDID, "error", err) 56 + return 57 + } 58 + 59 + body, err := json.Marshal(purgeManifestRequest{ManifestURI: manifestURI}) 60 + if err != nil { 61 + slog.Warn("purgeOnHold: failed to marshal request", 62 + "hold_did", holdDID, "error", err) 63 + return 64 + } 65 + 66 + req, err := http.NewRequestWithContext(timeoutCtx, http.MethodPost, 67 + holdURL+atproto.HoldPurgeManifest, bytes.NewReader(body)) 68 + if err != nil { 69 + slog.Warn("purgeOnHold: failed to create request", 70 + "hold_did", holdDID, "error", err) 71 + return 72 + } 73 + req.Header.Set("Authorization", "Bearer "+serviceToken) 74 + req.Header.Set("Content-Type", "application/json") 75 + 76 + resp, err := http.DefaultClient.Do(req) 77 + if err != nil { 78 + slog.Warn("purgeOnHold: request failed", 79 + "hold_did", holdDID, "manifest", manifestURI, "error", err) 80 + return 81 + } 82 + defer resp.Body.Close() 83 + 84 + if resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusUnauthorized { 85 + // Sailor pushing to a third-party hold won't have captain/crew-admin 86 + // rights; that's expected. Lazy GC on that hold will reclaim later. 87 + slog.Debug("purgeOnHold: not authorized on hold (lazy GC will handle)", 88 + "hold_did", holdDID, "manifest", manifestURI, "status", resp.StatusCode) 89 + return 90 + } 91 + if resp.StatusCode != http.StatusOK { 92 + body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) 93 + slog.Warn("purgeOnHold: hold returned non-OK status", 94 + "hold_did", holdDID, "manifest", manifestURI, 95 + "status", resp.StatusCode, "body", string(body)) 96 + return 97 + } 98 + 99 + var out struct { 100 + Success bool `json:"success"` 101 + LayersDeleted int `json:"layersDeleted"` 102 + ScanDeleted bool `json:"scanDeleted"` 103 + ImageConfigDeleted bool `json:"imageConfigDeleted"` 104 + } 105 + if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { 106 + slog.Warn("purgeOnHold: failed to parse response", 107 + "hold_did", holdDID, "manifest", manifestURI, "error", err) 108 + return 109 + } 110 + 111 + slog.Info("purgeOnHold: purge succeeded", 112 + "hold_did", holdDID, 113 + "manifest", manifestURI, 114 + "layers_deleted", out.LayersDeleted, 115 + "scan_deleted", out.ScanDeleted, 116 + "image_config_deleted", out.ImageConfigDeleted, 117 + ) 118 + }
+25
pkg/appview/handlers/images.go
··· 156 156 } 157 157 } 158 158 159 + // Read the appview's cached manifest row before deleting it so we know 160 + // which hold owned the blobs. Best-effort — if not cached, the manifest 161 + // record from the PDS would also have it but we don't pre-fetch the PDS 162 + // record just for this. Without a hold DID we just skip the eager purge 163 + // and fall back to lazy GC on the hold. 164 + var holdDID string 165 + if cached, err := db.GetManifestDetail(h.ReadOnlyDB, user.DID, repo, digest); err == nil && cached != nil { 166 + holdDID = cached.HoldEndpoint 167 + } 168 + 159 169 // Compute rkey for manifest record (digest without "sha256:" prefix) 160 170 rkey := strings.TrimPrefix(digest, "sha256:") 161 171 ··· 176 186 return 177 187 } 178 188 189 + // Tell the hold to drop its layer/scan/image-config records for this 190 + // manifest. Best-effort — failures here only mean the hold's lazy GC 191 + // will clean up later, so we don't reflect the failure to the user. 192 + manifestURI := atproto.BuildManifestURI(user.DID, digest) 193 + purgeOnHold(r.Context(), h.Refresher, user.DID, user.PDSEndpoint, holdDID, manifestURI) 194 + 179 195 w.WriteHeader(http.StatusOK) 180 196 } 181 197 ··· 233 249 for _, digest := range digests { 234 250 rkey := strings.TrimPrefix(digest, "sha256:") 235 251 252 + // Snapshot hold ownership before the delete so we can purge after. 253 + var holdDID string 254 + if cached, err := db.GetManifestDetail(h.ReadOnlyDB, user.DID, req.Repo, digest); err == nil && cached != nil { 255 + holdDID = cached.HoldEndpoint 256 + } 257 + 236 258 if err := pdsClient.DeleteRecord(r.Context(), atproto.ManifestCollection, rkey); err != nil { 237 259 if handleOAuthError(r.Context(), h.Refresher, user.DID, err) { 238 260 render.Status(r, http.StatusUnauthorized) ··· 252 274 failures = append(failures, failure{Digest: digest, Error: fmt.Sprintf("cache: %v", err)}) 253 275 continue 254 276 } 277 + 278 + manifestURI := atproto.BuildManifestURI(user.DID, digest) 279 + purgeOnHold(r.Context(), h.Refresher, user.DID, user.PDSEndpoint, holdDID, manifestURI) 255 280 256 281 deleted++ 257 282 }
+10
pkg/appview/labeler/subscriber.go
··· 129 129 cts, _ := time.Parse(time.RFC3339, le.Cts) 130 130 did, repo := extractSubjectFromURI(le.Uri) 131 131 132 + // Exp is optional in the ATProto label spec — treat unparseable 133 + // values as "no expiration" rather than dropping the label. 134 + var exp *time.Time 135 + if le.Exp != nil { 136 + if t, err := time.Parse(time.RFC3339, *le.Exp); err == nil { 137 + exp = &t 138 + } 139 + } 140 + 132 141 label := &db.Label{ 133 142 Src: le.Src, 134 143 URI: le.Uri, 135 144 Val: le.Val, 136 145 Neg: le.Neg != nil && *le.Neg, 137 146 Cts: cts, 147 + Exp: exp, 138 148 SubjectDID: did, 139 149 SubjectRepo: repo, 140 150 Seq: seq,
+8
pkg/atproto/endpoints.go
··· 73 73 // Method: DELETE 74 74 // Response: {"success": true, "crew_deleted": bool, "layers_deleted": int, "stats_deleted": int} 75 75 HoldDeleteUserData = "/xrpc/io.atcr.hold.deleteUserData" 76 + 77 + // HoldPurgeManifest purges layer, scan, and image-config records for a single 78 + // manifest. Called by the appview on UI manifest delete and by the hold's own 79 + // labeler subscriber on takedown. Idempotent. 80 + // Method: POST 81 + // Request: {"manifestUri": "at://did:.../io.atcr.manifest/<digest>"} 82 + // Response: {"success": true, "layersDeleted": N, "scanDeleted": bool, "imageConfigDeleted": bool} 83 + HoldPurgeManifest = "/xrpc/io.atcr.hold.purgeManifest" 76 84 ) 77 85 78 86 // Hold service crew management endpoints (io.atcr.hold.*)
+28
pkg/hold/config.go
··· 52 52 GC gc.Config `yaml:"gc" comment:"Garbage collection settings."` 53 53 Quota quota.Config `yaml:"quota" comment:"Storage quota tiers. Empty disables quota enforcement."` 54 54 Scanner ScannerConfig `yaml:"scanner" comment:"Vulnerability scanner settings. Empty disables scanning."` 55 + Labeler LabelerConfig `yaml:"labeler" comment:"Labeler subscription settings. When configured, the hold consumes takedown labels from the named labeler and purges affected records on receipt; GC consults the cache to gate blob cleanup. Empty subscribe_url disables."` 55 56 configPath string `yaml:"-"` // internal: path to YAML file for subsystem config loading 56 57 } 57 58 ··· 180 181 return URLFromDIDWeb(s.AppviewDID) 181 182 } 182 183 184 + // LabelerConfig defines labeler subscription settings. 185 + // 186 + // When DID is set, the hold opens a websocket to the labeler's 187 + // com.atproto.label.subscribeLabels endpoint and only honors labels whose 188 + // Src matches the same DID. Active takedowns are cached locally with their 189 + // Cts timestamp; on receipt of a !takedown label the hold immediately purges 190 + // layer/scan/image-config records for the labeled manifest (or all manifests 191 + // by the labeled DID for user-level takedowns). Negations drop the cache 192 + // entry. The GC consults the cache when computing referenced sets so blobs 193 + // survive a configurable grace window before being collected, preserving 194 + // reversibility. 195 + type LabelerConfig struct { 196 + // DID or URL of the labeler service. Accepts did:web:... (resolved to 197 + // the corresponding HTTPS host) or a raw http/https URL. Empty disables 198 + // labeler integration. 199 + DID string `yaml:"did" comment:"DID or URL of the ATProto labeler (e.g., did:web:labeler.atcr.io). Empty disables labeler integration."` 200 + 201 + // Grace window for reversibility. Until a takedown is older than this, 202 + // the GC keeps blobs referenced even though their layer records were 203 + // purged. After this window blobs become eligible for collection. 204 + GraceWindow time.Duration `yaml:"grace_window" comment:"Reversibility window for takedowns. Blobs survive this long after a takedown so the action can be reversed. After this window the GC reclaims them. Default: 720h (30 days)."` 205 + } 206 + 183 207 // ScannerConfig defines vulnerability scanner settings 184 208 type ScannerConfig struct { 185 209 // Shared secret for scanner WebSocket authentication. Empty disables scanning. ··· 270 294 // Scanner defaults 271 295 v.SetDefault("scanner.secret", "") 272 296 v.SetDefault("scanner.rescan_interval", "168h") // 7 days 297 + 298 + // Labeler defaults 299 + v.SetDefault("labeler.did", "") 300 + v.SetDefault("labeler.grace_window", "720h") // 30 days 273 301 274 302 // Log shipper defaults 275 303 v.SetDefault("log_shipper.batch_size", 100)
+85 -3
pkg/hold/gc/gc.go
··· 83 83 Duration time.Duration `json:"duration"` 84 84 } 85 85 86 + // TakedownGate is the interface the GC uses to consult the labeler cache when 87 + // computing reachability. Defined here (rather than imported) to keep the GC 88 + // package free of any direct dependency on the labeler package. 89 + // 90 + // IsTakenDown returns the takedown's creation timestamp and a boolean 91 + // indicating whether the manifest URI is currently under takedown (either by 92 + // per-manifest label or via a user-level label on its DID). 93 + type TakedownGate interface { 94 + IsTakenDown(manifestURI string) (cts time.Time, ok bool) 95 + } 96 + 97 + // Option configures optional GC behavior. 98 + type Option func(*GarbageCollector) 99 + 100 + // WithTakedownCache wires a takedown gate (typically the hold's labeler cache) 101 + // and a grace window. When set, analyzeRecords protects blobs of taken-down 102 + // manifests from collection until grace expires, and skips reconciliation of 103 + // their layer records so the labeler-driven purge isn't undone. 104 + // 105 + // Passing a nil gate or a non-positive window leaves the GC behaving as before. 106 + func WithTakedownCache(gate TakedownGate, graceWindow time.Duration) Option { 107 + return func(gc *GarbageCollector) { 108 + gc.takedownGate = gate 109 + gc.takedownGrace = graceWindow 110 + } 111 + } 112 + 86 113 // GarbageCollector handles cleanup of orphaned blobs from storage 87 114 type GarbageCollector struct { 88 115 pds *pds.HoldPDS 89 116 s3 *s3.S3Service 90 117 cfg Config 91 118 logger *slog.Logger 119 + 120 + // takedownGate, if non-nil, is consulted in analyzeRecords to gate blob 121 + // reachability and skip reconcile for taken-down manifests. 122 + takedownGate TakedownGate 123 + takedownGrace time.Duration 92 124 93 125 // stopCh signals the background goroutine to stop 94 126 stopCh chan struct{} ··· 151 183 totalRecords int 152 184 } 153 185 154 - // NewGarbageCollector creates a new GC instance 155 - func NewGarbageCollector(holdPDS *pds.HoldPDS, s3svc *s3.S3Service, cfg Config) *GarbageCollector { 156 - return &GarbageCollector{ 186 + // NewGarbageCollector creates a new GC instance. Optional behavior (such as 187 + // the labeler-aware takedown gate) is configured via Option arguments. 188 + func NewGarbageCollector(holdPDS *pds.HoldPDS, s3svc *s3.S3Service, cfg Config, opts ...Option) *GarbageCollector { 189 + gc := &GarbageCollector{ 157 190 pds: holdPDS, 158 191 s3: s3svc, 159 192 cfg: cfg, ··· 161 194 stopCh: make(chan struct{}), 162 195 predecessorCache: make(map[string]bool), 163 196 } 197 + for _, opt := range opts { 198 + opt(gc) 199 + } 200 + return gc 201 + } 202 + 203 + // isManifestTakenDown reports whether the labeler cache (if any) currently 204 + // holds a takedown for this manifest URI, returning the takedown's cts so the 205 + // caller can decide whether the grace window has elapsed. 206 + func (gc *GarbageCollector) isManifestTakenDown(manifestURI string) (time.Time, bool) { 207 + if gc.takedownGate == nil { 208 + return time.Time{}, false 209 + } 210 + return gc.takedownGate.IsTakenDown(manifestURI) 211 + } 212 + 213 + // takedownExpired reports whether a takedown's cts is older than the 214 + // configured grace window. With a non-positive window every takedown is 215 + // considered expired immediately (i.e. blobs are never protected). 216 + func (gc *GarbageCollector) takedownExpired(cts time.Time) bool { 217 + if gc.takedownGrace <= 0 { 218 + return true 219 + } 220 + return time.Since(cts) > gc.takedownGrace 164 221 } 165 222 166 223 // tryStart attempts to mark GC as running. Returns false if already running. ··· 636 693 fetchedUsers[did] = true 637 694 for _, m := range manifests { 638 695 result.manifestsChecked++ 696 + 697 + // Labeler-aware reachability: 698 + // - in-grace takedown: blobs stay referenced (so a reversal can 699 + // restore content), but the manifest is NOT added to 700 + // knownManifests so reconcileMissingRecords won't recreate the 701 + // layer records the labeler subscriber just purged. 702 + // - past-grace takedown: skip entirely — digests fall out of 703 + // the referenced set and become eligible for blob GC. 704 + if cts, taken := gc.isManifestTakenDown(m.URI); taken { 705 + if !gc.takedownExpired(cts) { 706 + for _, layer := range m.Record.Layers { 707 + result.referenced[layer.Digest] = true 708 + } 709 + if m.Record.Config != nil && m.Record.Config.Digest != "" { 710 + result.referenced[m.Record.Config.Digest] = true 711 + } 712 + gc.logger.Debug("Manifest under in-grace takedown: blobs protected, reconcile skipped", 713 + "manifest", m.URI, "cts", cts) 714 + } else { 715 + gc.logger.Debug("Manifest takedown past grace: orphaning blobs", 716 + "manifest", m.URI, "cts", cts) 717 + } 718 + continue 719 + } 720 + 639 721 knownManifests[m.URI] = m 640 722 641 723 // Add all layer digests to referenced set
+68
pkg/hold/gc/takedown_gate_test.go
··· 1 + package gc 2 + 3 + import ( 4 + "testing" 5 + "time" 6 + ) 7 + 8 + // stubGate satisfies TakedownGate for tests so we can exercise the GC's 9 + // reachability decisions without standing up a full labeler cache or PDS. 10 + type stubGate struct { 11 + uri string 12 + cts time.Time 13 + } 14 + 15 + func (s stubGate) IsTakenDown(uri string) (time.Time, bool) { 16 + if uri == s.uri { 17 + return s.cts, true 18 + } 19 + return time.Time{}, false 20 + } 21 + 22 + func TestIsManifestTakenDown(t *testing.T) { 23 + t.Run("nil gate", func(t *testing.T) { 24 + gc := &GarbageCollector{} 25 + if _, ok := gc.isManifestTakenDown("at://x"); ok { 26 + t.Fatalf("nil gate should report no takedowns") 27 + } 28 + }) 29 + t.Run("matching uri", func(t *testing.T) { 30 + cts := time.Now() 31 + gc := &GarbageCollector{takedownGate: stubGate{uri: "at://x", cts: cts}} 32 + got, ok := gc.isManifestTakenDown("at://x") 33 + if !ok { 34 + t.Fatalf("gate should report takedown for matching URI") 35 + } 36 + if !got.Equal(cts) { 37 + t.Fatalf("cts = %v, want %v", got, cts) 38 + } 39 + }) 40 + t.Run("non-matching uri", func(t *testing.T) { 41 + gc := &GarbageCollector{takedownGate: stubGate{uri: "at://x", cts: time.Now()}} 42 + if _, ok := gc.isManifestTakenDown("at://y"); ok { 43 + t.Fatalf("non-matching URI should not report takedown") 44 + } 45 + }) 46 + } 47 + 48 + func TestTakedownExpired(t *testing.T) { 49 + tests := []struct { 50 + name string 51 + cts time.Time 52 + grace time.Duration 53 + expired bool 54 + }{ 55 + {"in-window", time.Now().Add(-time.Hour), 24 * time.Hour, false}, 56 + {"past-window", time.Now().Add(-48 * time.Hour), 24 * time.Hour, true}, 57 + {"zero grace expires immediately", time.Now(), 0, true}, 58 + {"negative grace expires immediately", time.Now(), -time.Hour, true}, 59 + } 60 + for _, tt := range tests { 61 + t.Run(tt.name, func(t *testing.T) { 62 + gc := &GarbageCollector{takedownGrace: tt.grace} 63 + if got := gc.takedownExpired(tt.cts); got != tt.expired { 64 + t.Fatalf("takedownExpired = %v, want %v", got, tt.expired) 65 + } 66 + }) 67 + } 68 + }
+177
pkg/hold/labeler/cache.go
··· 1 + // Package labeler provides a labeler subscription client for the hold service. 2 + // 3 + // The hold subscribes to one labeler and mirrors active takedowns into a local 4 + // cache (in-memory + SQLite). On takedown receipt the hold purges layer, scan, 5 + // and image-config records for the affected manifest. The cache is consulted 6 + // by the GC to gate blob deletion: while a takedown is within its grace window, 7 + // the manifest's blob digests stay in the GC's referenced set so reversal can 8 + // still restore them. 9 + package labeler 10 + 11 + import ( 12 + "database/sql" 13 + "fmt" 14 + "strings" 15 + "sync" 16 + "time" 17 + ) 18 + 19 + // Cache holds active takedown URIs and their creation timestamps. It is 20 + // thread-safe and persists state to SQLite so the hold can answer takedown 21 + // queries before the labeler subscription has caught up after a restart. 22 + type Cache struct { 23 + mu sync.RWMutex 24 + // manifest URI → cts. Includes both per-record URIs (at://did/coll/rkey) 25 + // and per-DID URIs (at://did) for user-level takedowns. 26 + entries map[string]time.Time 27 + 28 + db *sql.DB 29 + } 30 + 31 + // NewCache opens (or creates) the takedown_cache and labeler_cursor tables on 32 + // the given DB and loads any existing entries into memory. 33 + func NewCache(db *sql.DB) (*Cache, error) { 34 + c := &Cache{ 35 + entries: make(map[string]time.Time), 36 + db: db, 37 + } 38 + 39 + stmts := []string{ 40 + `CREATE TABLE IF NOT EXISTS takedown_cache ( 41 + uri TEXT PRIMARY KEY, 42 + src TEXT NOT NULL, 43 + cts TIMESTAMP NOT NULL 44 + )`, 45 + `CREATE INDEX IF NOT EXISTS idx_takedown_cache_cts ON takedown_cache(cts)`, 46 + `CREATE TABLE IF NOT EXISTS labeler_cursor ( 47 + labeler_did TEXT PRIMARY KEY, 48 + cursor INTEGER NOT NULL 49 + )`, 50 + } 51 + for _, s := range stmts { 52 + if _, err := db.Exec(s); err != nil { 53 + return nil, fmt.Errorf("init labeler schema: %w", err) 54 + } 55 + } 56 + 57 + rows, err := db.Query(`SELECT uri, cts FROM takedown_cache`) 58 + if err != nil { 59 + return nil, fmt.Errorf("load takedown cache: %w", err) 60 + } 61 + defer rows.Close() 62 + for rows.Next() { 63 + var uri string 64 + var cts time.Time 65 + if err := rows.Scan(&uri, &cts); err != nil { 66 + return nil, fmt.Errorf("scan takedown_cache row: %w", err) 67 + } 68 + c.entries[uri] = cts 69 + } 70 + return c, nil 71 + } 72 + 73 + // Set records a positive takedown for uri at cts. Idempotent: re-applying 74 + // updates the timestamp (newer takedowns win). 75 + func (c *Cache) Set(uri, src string, cts time.Time) error { 76 + c.mu.Lock() 77 + c.entries[uri] = cts 78 + c.mu.Unlock() 79 + 80 + _, err := c.db.Exec( 81 + `INSERT INTO takedown_cache (uri, src, cts) VALUES (?, ?, ?) 82 + ON CONFLICT(uri) DO UPDATE SET src = excluded.src, cts = excluded.cts`, 83 + uri, src, cts, 84 + ) 85 + if err != nil { 86 + return fmt.Errorf("persist takedown: %w", err) 87 + } 88 + return nil 89 + } 90 + 91 + // Negate removes a takedown entry. Idempotent. 92 + func (c *Cache) Negate(uri string) error { 93 + c.mu.Lock() 94 + delete(c.entries, uri) 95 + c.mu.Unlock() 96 + 97 + _, err := c.db.Exec(`DELETE FROM takedown_cache WHERE uri = ?`, uri) 98 + if err != nil { 99 + return fmt.Errorf("delete takedown: %w", err) 100 + } 101 + return nil 102 + } 103 + 104 + // IsTakenDown reports whether a manifest URI is taken down, either directly 105 + // (per-manifest takedown) or via a user-level takedown on its DID. The returned 106 + // timestamp is the earliest cts that applies (i.e. the longest-standing 107 + // takedown), which is what the grace check should compare against. 108 + func (c *Cache) IsTakenDown(manifestURI string) (cts time.Time, ok bool) { 109 + c.mu.RLock() 110 + defer c.mu.RUnlock() 111 + 112 + if t, has := c.entries[manifestURI]; has { 113 + cts = t 114 + ok = true 115 + } 116 + 117 + // User-level: at://<did> with no path, applies to every record by that DID. 118 + did := didFromManifestURI(manifestURI) 119 + if did != "" { 120 + userURI := "at://" + did 121 + if t, has := c.entries[userURI]; has { 122 + if !ok || t.Before(cts) { 123 + cts = t 124 + ok = true 125 + } 126 + } 127 + } 128 + return cts, ok 129 + } 130 + 131 + // IsExpired returns true if cts is older than the grace window. 132 + func IsExpired(cts time.Time, graceWindow time.Duration) bool { 133 + if graceWindow <= 0 { 134 + return true 135 + } 136 + return time.Since(cts) > graceWindow 137 + } 138 + 139 + // GetCursor returns the last persisted cursor for a labeler DID (0 if none). 140 + func (c *Cache) GetCursor(labelerDID string) (int64, error) { 141 + var cursor int64 142 + err := c.db.QueryRow(`SELECT cursor FROM labeler_cursor WHERE labeler_did = ?`, labelerDID).Scan(&cursor) 143 + if err == sql.ErrNoRows { 144 + return 0, nil 145 + } 146 + if err != nil { 147 + return 0, fmt.Errorf("read labeler cursor: %w", err) 148 + } 149 + return cursor, nil 150 + } 151 + 152 + // SetCursor persists the cursor for a labeler DID. 153 + func (c *Cache) SetCursor(labelerDID string, cursor int64) error { 154 + _, err := c.db.Exec( 155 + `INSERT INTO labeler_cursor (labeler_did, cursor) VALUES (?, ?) 156 + ON CONFLICT(labeler_did) DO UPDATE SET cursor = excluded.cursor`, 157 + labelerDID, cursor, 158 + ) 159 + if err != nil { 160 + return fmt.Errorf("persist labeler cursor: %w", err) 161 + } 162 + return nil 163 + } 164 + 165 + // didFromManifestURI extracts the authority (DID) from an at:// URI. 166 + // Returns "" for malformed input. 167 + func didFromManifestURI(uri string) string { 168 + const prefix = "at://" 169 + if !strings.HasPrefix(uri, prefix) { 170 + return "" 171 + } 172 + rest := uri[len(prefix):] 173 + if i := strings.IndexByte(rest, '/'); i >= 0 { 174 + return rest[:i] 175 + } 176 + return rest 177 + }
+191
pkg/hold/labeler/cache_test.go
··· 1 + package labeler 2 + 3 + import ( 4 + "database/sql" 5 + "testing" 6 + "time" 7 + 8 + _ "github.com/tursodatabase/go-libsql" 9 + ) 10 + 11 + func newTestCache(t *testing.T) *Cache { 12 + t.Helper() 13 + db, err := sql.Open("libsql", ":memory:") 14 + if err != nil { 15 + t.Fatalf("open in-memory db: %v", err) 16 + } 17 + t.Cleanup(func() { _ = db.Close() }) 18 + 19 + c, err := NewCache(db) 20 + if err != nil { 21 + t.Fatalf("NewCache: %v", err) 22 + } 23 + return c 24 + } 25 + 26 + func TestCacheSetAndIsTakenDown(t *testing.T) { 27 + c := newTestCache(t) 28 + uri := "at://did:plc:alice/io.atcr.manifest/abc" 29 + cts := time.Now().UTC().Add(-time.Hour) 30 + 31 + if _, ok := c.IsTakenDown(uri); ok { 32 + t.Fatalf("expected not taken down before Set") 33 + } 34 + if err := c.Set(uri, "did:web:labeler", cts); err != nil { 35 + t.Fatalf("Set: %v", err) 36 + } 37 + 38 + got, ok := c.IsTakenDown(uri) 39 + if !ok { 40 + t.Fatalf("expected taken down after Set") 41 + } 42 + if !got.Equal(cts) { 43 + t.Fatalf("cts = %v, want %v", got, cts) 44 + } 45 + } 46 + 47 + func TestCacheNegateRemovesEntry(t *testing.T) { 48 + c := newTestCache(t) 49 + uri := "at://did:plc:alice/io.atcr.manifest/abc" 50 + if err := c.Set(uri, "did:web:labeler", time.Now()); err != nil { 51 + t.Fatal(err) 52 + } 53 + if err := c.Negate(uri); err != nil { 54 + t.Fatalf("Negate: %v", err) 55 + } 56 + if _, ok := c.IsTakenDown(uri); ok { 57 + t.Fatalf("expected not taken down after Negate") 58 + } 59 + } 60 + 61 + func TestCacheUserLevelTakedownAppliesToAllManifests(t *testing.T) { 62 + c := newTestCache(t) 63 + userURI := "at://did:plc:alice" 64 + cts := time.Now().UTC() 65 + 66 + if err := c.Set(userURI, "did:web:labeler", cts); err != nil { 67 + t.Fatal(err) 68 + } 69 + 70 + manifestURI := "at://did:plc:alice/io.atcr.manifest/anything" 71 + got, ok := c.IsTakenDown(manifestURI) 72 + if !ok { 73 + t.Fatalf("user-level takedown should apply to manifest URI") 74 + } 75 + if !got.Equal(cts) { 76 + t.Fatalf("cts = %v, want %v", got, cts) 77 + } 78 + 79 + otherURI := "at://did:plc:bob/io.atcr.manifest/x" 80 + if _, ok := c.IsTakenDown(otherURI); ok { 81 + t.Fatalf("user-level takedown for alice must not affect bob") 82 + } 83 + } 84 + 85 + func TestCacheChoosesEarliestCtsAcrossSources(t *testing.T) { 86 + c := newTestCache(t) 87 + earlier := time.Now().UTC().Add(-2 * time.Hour) 88 + later := time.Now().UTC() 89 + 90 + manifestURI := "at://did:plc:alice/io.atcr.manifest/x" 91 + userURI := "at://did:plc:alice" 92 + 93 + // Per-manifest later, user-level earlier — IsTakenDown should report the earlier one. 94 + if err := c.Set(manifestURI, "did:web:labeler", later); err != nil { 95 + t.Fatal(err) 96 + } 97 + if err := c.Set(userURI, "did:web:labeler", earlier); err != nil { 98 + t.Fatal(err) 99 + } 100 + 101 + got, ok := c.IsTakenDown(manifestURI) 102 + if !ok { 103 + t.Fatalf("expected taken down") 104 + } 105 + if !got.Equal(earlier) { 106 + t.Fatalf("cts = %v, want earliest (%v)", got, earlier) 107 + } 108 + } 109 + 110 + func TestCachePersistsAcrossInstances(t *testing.T) { 111 + db, err := sql.Open("libsql", ":memory:") 112 + if err != nil { 113 + t.Fatalf("open: %v", err) 114 + } 115 + t.Cleanup(func() { _ = db.Close() }) 116 + 117 + first, err := NewCache(db) 118 + if err != nil { 119 + t.Fatalf("first NewCache: %v", err) 120 + } 121 + uri := "at://did:plc:alice/io.atcr.manifest/abc" 122 + cts := time.Now().UTC() 123 + if err := first.Set(uri, "did:web:labeler", cts); err != nil { 124 + t.Fatal(err) 125 + } 126 + 127 + // New cache instance, same DB — should warm-load entry. 128 + second, err := NewCache(db) 129 + if err != nil { 130 + t.Fatalf("second NewCache: %v", err) 131 + } 132 + if _, ok := second.IsTakenDown(uri); !ok { 133 + t.Fatalf("second cache should see persisted takedown") 134 + } 135 + } 136 + 137 + func TestCacheCursorRoundTrip(t *testing.T) { 138 + c := newTestCache(t) 139 + const labeler = "did:web:labeler" 140 + 141 + got, err := c.GetCursor(labeler) 142 + if err != nil { 143 + t.Fatal(err) 144 + } 145 + if got != 0 { 146 + t.Fatalf("initial cursor = %d, want 0", got) 147 + } 148 + 149 + if err := c.SetCursor(labeler, 42); err != nil { 150 + t.Fatal(err) 151 + } 152 + got, err = c.GetCursor(labeler) 153 + if err != nil { 154 + t.Fatal(err) 155 + } 156 + if got != 42 { 157 + t.Fatalf("cursor = %d, want 42", got) 158 + } 159 + 160 + if err := c.SetCursor(labeler, 100); err != nil { 161 + t.Fatal(err) 162 + } 163 + got, err = c.GetCursor(labeler) 164 + if err != nil { 165 + t.Fatal(err) 166 + } 167 + if got != 100 { 168 + t.Fatalf("cursor after upsert = %d, want 100", got) 169 + } 170 + } 171 + 172 + func TestIsExpired(t *testing.T) { 173 + tests := []struct { 174 + name string 175 + cts time.Time 176 + grace time.Duration 177 + expired bool 178 + }{ 179 + {"in-window", time.Now().Add(-time.Hour), 24 * time.Hour, false}, 180 + {"past-window", time.Now().Add(-48 * time.Hour), 24 * time.Hour, true}, 181 + {"zero grace expires immediately", time.Now(), 0, true}, 182 + {"negative grace expires immediately", time.Now(), -time.Hour, true}, 183 + } 184 + for _, tt := range tests { 185 + t.Run(tt.name, func(t *testing.T) { 186 + if got := IsExpired(tt.cts, tt.grace); got != tt.expired { 187 + t.Fatalf("IsExpired = %v, want %v", got, tt.expired) 188 + } 189 + }) 190 + } 191 + }
+374
pkg/hold/labeler/subscriber.go
··· 1 + package labeler 2 + 3 + import ( 4 + "bytes" 5 + "context" 6 + "errors" 7 + "fmt" 8 + "log/slog" 9 + "net/url" 10 + "strings" 11 + "time" 12 + 13 + comatproto "github.com/bluesky-social/indigo/api/atproto" 14 + "github.com/bluesky-social/indigo/events" 15 + "github.com/gorilla/websocket" 16 + ) 17 + 18 + // TakedownLabelValue is the label value the hold treats as a takedown trigger. 19 + // Mirrors what pkg/labeler/takedown.go emits. 20 + const TakedownLabelValue = "!takedown" 21 + 22 + // Purger is the subset of HoldPDS the subscriber needs to act on takedowns. 23 + // Defined as an interface so tests can substitute a stub without standing up 24 + // a full PDS, and to avoid an import cycle with pkg/hold/pds. 25 + type Purger interface { 26 + PurgeManifestRecords(ctx context.Context, manifestURI string) (PurgeOutcome, error) 27 + PurgeUserManifests(ctx context.Context, userDID string) (PurgeOutcome, error) 28 + } 29 + 30 + // PurgeOutcome mirrors pds.PurgeResult without creating an import cycle. 31 + type PurgeOutcome struct { 32 + LayersDeleted int 33 + ScanDeleted bool 34 + ImageConfigDeleted bool 35 + } 36 + 37 + // Subscriber connects to a labeler's subscribeLabels endpoint, mirrors 38 + // takedowns into the local cache, and triggers record purges on the hold. 39 + type Subscriber struct { 40 + labelerURL string 41 + labelerDID string 42 + cache *Cache 43 + purger Purger 44 + stopCh chan struct{} 45 + } 46 + 47 + // NewSubscriber builds a subscriber for the given labeler. labelerDIDOrURL 48 + // may be either: 49 + // 50 + // - a did:web identifier (e.g. did:web:labeler.atcr.io) → resolved to https://labeler.atcr.io 51 + // - a raw http/https URL (e.g. http://172.28.0.4:5002 for dev) 52 + // 53 + // The websocket URL is derived from the resolved HTTPS endpoint; the 54 + // labeler's DID (used to filter the Src field on incoming labels) is derived 55 + // the same way the appview's labeler subscriber derives it, so a single 56 + // config field suffices. 57 + func NewSubscriber(labelerDIDOrURL string, cache *Cache, purger Purger) *Subscriber { 58 + httpURL := parseLabelerURL(labelerDIDOrURL) 59 + return &Subscriber{ 60 + labelerURL: httpURL, 61 + labelerDID: deriveLabelerDID(labelerDIDOrURL, httpURL), 62 + cache: cache, 63 + purger: purger, 64 + stopCh: make(chan struct{}), 65 + } 66 + } 67 + 68 + // LabelerDID returns the DID derived from the labeler URL. Useful for the 69 + // caller to log the trusted source. 70 + func (s *Subscriber) LabelerDID() string { return s.labelerDID } 71 + 72 + // Start runs the subscription loop in a goroutine. 73 + func (s *Subscriber) Start() { 74 + go s.run() 75 + } 76 + 77 + // Stop signals the subscriber to shut down. Safe to call once. 78 + func (s *Subscriber) Stop() { 79 + close(s.stopCh) 80 + } 81 + 82 + func (s *Subscriber) run() { 83 + backoff := time.Second 84 + for { 85 + select { 86 + case <-s.stopCh: 87 + return 88 + default: 89 + } 90 + 91 + if err := s.connect(); err != nil { 92 + slog.Warn("Hold labeler subscription error, reconnecting", 93 + "labeler", s.labelerURL, 94 + "error", err, 95 + "backoff", backoff, 96 + ) 97 + select { 98 + case <-s.stopCh: 99 + return 100 + case <-time.After(backoff): 101 + } 102 + if backoff < 30*time.Second { 103 + backoff *= 2 104 + } 105 + } else { 106 + backoff = time.Second 107 + } 108 + } 109 + } 110 + 111 + func (s *Subscriber) connect() error { 112 + cursor, err := s.cache.GetCursor(s.labelerDID) 113 + if err != nil { 114 + return fmt.Errorf("get cursor: %w", err) 115 + } 116 + 117 + wsURL := toWebSocketURL(s.labelerURL) + "/xrpc/com.atproto.label.subscribeLabels" 118 + if cursor > 0 { 119 + wsURL += fmt.Sprintf("?cursor=%d", cursor) 120 + } 121 + 122 + slog.Info("Hold connecting to labeler", "url", wsURL, "cursor", cursor) 123 + conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil) 124 + if err != nil { 125 + return fmt.Errorf("websocket dial: %w", err) 126 + } 127 + defer conn.Close() 128 + slog.Info("Hold connected to labeler", "url", s.labelerURL) 129 + 130 + for { 131 + select { 132 + case <-s.stopCh: 133 + return nil 134 + default: 135 + } 136 + 137 + mt, payload, err := conn.ReadMessage() 138 + if err != nil { 139 + return fmt.Errorf("read: %w", err) 140 + } 141 + if mt != websocket.BinaryMessage { 142 + slog.Warn("Hold labeler: ignoring non-binary frame", "type", mt) 143 + continue 144 + } 145 + 146 + seq, labels, err := decodeFrame(payload) 147 + if err != nil { 148 + if errors.Is(err, errInfoFrame) { 149 + continue 150 + } 151 + return fmt.Errorf("decode frame: %w", err) 152 + } 153 + 154 + for _, lbl := range labels { 155 + s.applyLabel(seq, lbl) 156 + } 157 + 158 + if err := s.cache.SetCursor(s.labelerDID, seq); err != nil { 159 + slog.Warn("Hold labeler: failed to persist cursor", "seq", seq, "error", err) 160 + } 161 + } 162 + } 163 + 164 + // applyLabel processes one label. Only takedown labels from a trusted source 165 + // trigger cache mutations and record purges; everything else is ignored. 166 + func (s *Subscriber) applyLabel(seq int64, lbl *comatproto.LabelDefs_Label) { 167 + if lbl == nil { 168 + return 169 + } 170 + if lbl.Val != TakedownLabelValue { 171 + return 172 + } 173 + if !s.trustsSource(lbl.Src) { 174 + slog.Debug("Hold labeler: ignoring untrusted source", 175 + "src", lbl.Src, "uri", lbl.Uri, "seq", seq) 176 + return 177 + } 178 + 179 + cts, _ := time.Parse(time.RFC3339, lbl.Cts) 180 + negated := lbl.Neg != nil && *lbl.Neg 181 + 182 + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) 183 + defer cancel() 184 + 185 + if negated { 186 + if err := s.cache.Negate(lbl.Uri); err != nil { 187 + slog.Warn("Hold labeler: failed to drop takedown from cache", 188 + "uri", lbl.Uri, "error", err) 189 + return 190 + } 191 + slog.Info("Hold labeler: takedown reversed", "uri", lbl.Uri, "seq", seq) 192 + return 193 + } 194 + 195 + if err := s.cache.Set(lbl.Uri, lbl.Src, cts); err != nil { 196 + slog.Warn("Hold labeler: failed to record takedown", 197 + "uri", lbl.Uri, "error", err) 198 + return 199 + } 200 + 201 + // User-level vs per-record. The labeler emits per-record labels for every 202 + // individual manifest in a repo-level takedown plus a summary; for a 203 + // user-level takedown only at://<did> is emitted. We dispatch on shape so 204 + // we don't try to PurgeManifestRecords on a non-manifest URI. 205 + switch shape := classifyURI(lbl.Uri); shape.Kind { 206 + case uriKindManifest: 207 + out, err := s.purger.PurgeManifestRecords(ctx, lbl.Uri) 208 + if err != nil { 209 + slog.Warn("Hold labeler: purge failed", "uri", lbl.Uri, "error", err) 210 + return 211 + } 212 + slog.Info("Hold labeler: purged manifest on takedown", 213 + "uri", lbl.Uri, "layers", out.LayersDeleted, 214 + "scan", out.ScanDeleted, "config", out.ImageConfigDeleted) 215 + case uriKindUser: 216 + out, err := s.purger.PurgeUserManifests(ctx, shape.DID) 217 + if err != nil { 218 + slog.Warn("Hold labeler: user-level purge failed", 219 + "did", shape.DID, "error", err) 220 + return 221 + } 222 + slog.Info("Hold labeler: purged all manifests for user on takedown", 223 + "did", shape.DID, "layers", out.LayersDeleted) 224 + default: 225 + // Repo-level summary URI (at://did/io.atcr.repo/<repo>) and other 226 + // non-record subjects: cached for GC's reachability check, but no 227 + // records to purge directly. The per-manifest labels in the same 228 + // stream do the actual record removal. 229 + slog.Debug("Hold labeler: cached non-record takedown", 230 + "uri", lbl.Uri, "kind", string(shape.Kind)) 231 + } 232 + } 233 + 234 + // uriKind classifies the shape of a label subject URI. 235 + type uriKind string 236 + 237 + const ( 238 + uriKindManifest uriKind = "manifest" 239 + uriKindUser uriKind = "user" 240 + uriKindOther uriKind = "other" 241 + ) 242 + 243 + type uriShape struct { 244 + Kind uriKind 245 + DID string 246 + } 247 + 248 + // classifyURI inspects an at:// URI and reports whether it points at a single 249 + // manifest record, an entire user, or something else (repo summary etc.). 250 + func classifyURI(uri string) uriShape { 251 + const prefix = "at://" 252 + if !strings.HasPrefix(uri, prefix) { 253 + return uriShape{Kind: uriKindOther} 254 + } 255 + rest := uri[len(prefix):] 256 + parts := strings.SplitN(rest, "/", 3) 257 + if len(parts) == 0 || parts[0] == "" { 258 + return uriShape{Kind: uriKindOther} 259 + } 260 + did := parts[0] 261 + if len(parts) == 1 { 262 + return uriShape{Kind: uriKindUser, DID: did} 263 + } 264 + if len(parts) == 3 && parts[1] == "io.atcr.manifest" && parts[2] != "" { 265 + return uriShape{Kind: uriKindManifest, DID: did} 266 + } 267 + return uriShape{Kind: uriKindOther, DID: did} 268 + } 269 + 270 + // errInfoFrame signals that the frame was an info frame and the caller should 271 + // continue without treating it as a label or an error. 272 + var errInfoFrame = errors.New("hold labeler: info frame") 273 + 274 + // decodeFrame parses a single subscribeLabels binary frame. 275 + // 276 + // ATProto event-stream framing is two concatenated CBOR objects: a {op,t} 277 + // header and a body. We dispatch on op/t; for op=1, t="#labels" we return the 278 + // labels body. #info frames are logged and signal errInfoFrame so the caller 279 + // loops; error frames become Go errors so the run loop reconnects. 280 + func decodeFrame(payload []byte) (int64, []*comatproto.LabelDefs_Label, error) { 281 + r := bytes.NewReader(payload) 282 + var header events.EventHeader 283 + if err := header.UnmarshalCBOR(r); err != nil { 284 + return 0, nil, fmt.Errorf("unmarshal header: %w", err) 285 + } 286 + 287 + switch { 288 + case header.Op == events.EvtKindErrorFrame: 289 + var ef events.ErrorFrame 290 + if err := ef.UnmarshalCBOR(r); err != nil { 291 + return 0, nil, fmt.Errorf("unmarshal error frame: %w", err) 292 + } 293 + return 0, nil, fmt.Errorf("labeler error frame: %s — %s", ef.Error, ef.Message) 294 + case header.Op == events.EvtKindMessage && header.MsgType == "#labels": 295 + var body comatproto.LabelSubscribeLabels_Labels 296 + if err := body.UnmarshalCBOR(r); err != nil { 297 + return 0, nil, fmt.Errorf("unmarshal labels body: %w", err) 298 + } 299 + return body.Seq, body.Labels, nil 300 + case header.Op == events.EvtKindMessage && header.MsgType == "#info": 301 + var info comatproto.LabelSubscribeLabels_Info 302 + if err := info.UnmarshalCBOR(r); err != nil { 303 + return 0, nil, fmt.Errorf("unmarshal info body: %w", err) 304 + } 305 + message := "" 306 + if info.Message != nil { 307 + message = *info.Message 308 + } 309 + slog.Info("Hold labeler: info frame", "name", info.Name, "message", message) 310 + return 0, nil, errInfoFrame 311 + default: 312 + return 0, nil, fmt.Errorf("unexpected frame op=%d t=%q", header.Op, header.MsgType) 313 + } 314 + } 315 + 316 + // trustsSource reports whether labels with the given Src DID should be 317 + // honored. Today this is "matches the configured labeler DID" — a single 318 + // trusted source. If we ever want a list, this becomes a set membership 319 + // check without touching callers. 320 + func (s *Subscriber) trustsSource(src string) bool { 321 + return src == s.labelerDID 322 + } 323 + 324 + // parseLabelerURL accepts either a did:web:... identifier or a raw http/https 325 + // URL and returns the HTTPS (or HTTP for did:web pointing at a hostname with 326 + // %3A-encoded port in test mode) endpoint to talk to. did:web hosts with 327 + // %3A-encoded ports are decoded back to colons. Mirrors the appview's 328 + // ParseLabelerURL so a single config field works in both places. 329 + func parseLabelerURL(labelerDIDOrURL string) string { 330 + if strings.HasPrefix(labelerDIDOrURL, "http://") || strings.HasPrefix(labelerDIDOrURL, "https://") { 331 + return labelerDIDOrURL 332 + } 333 + if strings.HasPrefix(labelerDIDOrURL, "did:web:") { 334 + host := strings.TrimPrefix(labelerDIDOrURL, "did:web:") 335 + host = strings.ReplaceAll(host, "%3A", ":") 336 + return "https://" + host 337 + } 338 + return labelerDIDOrURL 339 + } 340 + 341 + // deriveLabelerDID returns the canonical labeler DID for source filtering. 342 + // When the operator gave us a did:web identifier directly, we use it as-is. 343 + // When they gave us a URL, we derive a did:web from its host (so dev URLs 344 + // like http://172.28.0.4:5002 yield did:web:172.28.0.4%3A5002, matching the 345 + // labeler's own self-served identity). 346 + func deriveLabelerDID(labelerDIDOrURL, httpURL string) string { 347 + if strings.HasPrefix(labelerDIDOrURL, "did:") { 348 + return labelerDIDOrURL 349 + } 350 + u, err := url.Parse(httpURL) 351 + if err != nil { 352 + return labelerDIDOrURL 353 + } 354 + host := u.Hostname() 355 + if port := u.Port(); port != "" { 356 + host += "%3A" + port 357 + } 358 + return "did:web:" + host 359 + } 360 + 361 + // toWebSocketURL converts an HTTP URL to a WebSocket URL. http→ws, https→wss. 362 + func toWebSocketURL(httpURL string) string { 363 + u, err := url.Parse(httpURL) 364 + if err != nil { 365 + return httpURL 366 + } 367 + switch u.Scheme { 368 + case "https": 369 + u.Scheme = "wss" 370 + default: 371 + u.Scheme = "ws" 372 + } 373 + return u.String() 374 + }
+210
pkg/hold/labeler/subscriber_test.go
··· 1 + package labeler 2 + 3 + import ( 4 + "context" 5 + "sync" 6 + "testing" 7 + "time" 8 + 9 + comatproto "github.com/bluesky-social/indigo/api/atproto" 10 + ) 11 + 12 + // stubPurger captures purge calls so we can assert what the subscriber routed. 13 + type stubPurger struct { 14 + mu sync.Mutex 15 + manifestCalls []string 16 + userLevelCalls []string 17 + purgeManifestErr error 18 + purgeUserError error 19 + manifestOutcome PurgeOutcome 20 + userLevelOutcome PurgeOutcome 21 + } 22 + 23 + func (s *stubPurger) PurgeManifestRecords(_ context.Context, uri string) (PurgeOutcome, error) { 24 + s.mu.Lock() 25 + s.manifestCalls = append(s.manifestCalls, uri) 26 + s.mu.Unlock() 27 + return s.manifestOutcome, s.purgeManifestErr 28 + } 29 + 30 + func (s *stubPurger) PurgeUserManifests(_ context.Context, did string) (PurgeOutcome, error) { 31 + s.mu.Lock() 32 + s.userLevelCalls = append(s.userLevelCalls, did) 33 + s.mu.Unlock() 34 + return s.userLevelOutcome, s.purgeUserError 35 + } 36 + 37 + func (s *stubPurger) snapshot() (manifests, users []string) { 38 + s.mu.Lock() 39 + defer s.mu.Unlock() 40 + return append([]string(nil), s.manifestCalls...), append([]string(nil), s.userLevelCalls...) 41 + } 42 + 43 + func ptrBool(b bool) *bool { return &b } 44 + 45 + func TestApplyLabelManifestTakedownPurges(t *testing.T) { 46 + cache := newTestCache(t) 47 + purger := &stubPurger{} 48 + // Use a did:web identifier directly so we control exactly what Src must 49 + // match — derived URL is https://labeler.example.com. 50 + sub := NewSubscriber("did:web:labeler.example.com", cache, purger) 51 + 52 + uri := "at://did:plc:alice/io.atcr.manifest/abc" 53 + cts := time.Now().UTC().Format(time.RFC3339) 54 + 55 + sub.applyLabel(1, &comatproto.LabelDefs_Label{ 56 + Src: "did:web:labeler.example.com", 57 + Uri: uri, 58 + Val: TakedownLabelValue, 59 + Cts: cts, 60 + }) 61 + 62 + manifests, users := purger.snapshot() 63 + if len(manifests) != 1 || manifests[0] != uri { 64 + t.Fatalf("manifest purges = %v, want [%q]", manifests, uri) 65 + } 66 + if len(users) != 0 { 67 + t.Fatalf("expected no user-level purges, got %v", users) 68 + } 69 + if _, ok := cache.IsTakenDown(uri); !ok { 70 + t.Fatalf("cache should record the takedown") 71 + } 72 + } 73 + 74 + func TestApplyLabelUserLevelTakedownPurgesAllManifests(t *testing.T) { 75 + cache := newTestCache(t) 76 + purger := &stubPurger{} 77 + sub := NewSubscriber("did:web:labeler.example.com", cache, purger) 78 + 79 + uri := "at://did:plc:alice" 80 + sub.applyLabel(1, &comatproto.LabelDefs_Label{ 81 + Src: "did:web:labeler.example.com", 82 + Uri: uri, 83 + Val: TakedownLabelValue, 84 + Cts: time.Now().UTC().Format(time.RFC3339), 85 + }) 86 + 87 + manifests, users := purger.snapshot() 88 + if len(users) != 1 || users[0] != "did:plc:alice" { 89 + t.Fatalf("user-level purges = %v, want [did:plc:alice]", users) 90 + } 91 + if len(manifests) != 0 { 92 + t.Fatalf("expected no per-manifest purges, got %v", manifests) 93 + } 94 + if _, ok := cache.IsTakenDown("at://did:plc:alice/io.atcr.manifest/anything"); !ok { 95 + t.Fatalf("user-level entry should mask any manifest URI for that DID") 96 + } 97 + } 98 + 99 + func TestApplyLabelNegationDropsCacheNoPurge(t *testing.T) { 100 + cache := newTestCache(t) 101 + uri := "at://did:plc:alice/io.atcr.manifest/abc" 102 + if err := cache.Set(uri, "did:web:labeler.example.com", time.Now()); err != nil { 103 + t.Fatal(err) 104 + } 105 + purger := &stubPurger{} 106 + sub := NewSubscriber("did:web:labeler.example.com", cache, purger) 107 + 108 + sub.applyLabel(2, &comatproto.LabelDefs_Label{ 109 + Src: "did:web:labeler.example.com", 110 + Uri: uri, 111 + Val: TakedownLabelValue, 112 + Neg: ptrBool(true), 113 + Cts: time.Now().UTC().Format(time.RFC3339), 114 + }) 115 + 116 + if _, ok := cache.IsTakenDown(uri); ok { 117 + t.Fatalf("negation should drop the takedown from cache") 118 + } 119 + manifests, _ := purger.snapshot() 120 + if len(manifests) != 0 { 121 + t.Fatalf("negation must not trigger purge, got %v", manifests) 122 + } 123 + } 124 + 125 + func TestApplyLabelIgnoresUntrustedSource(t *testing.T) { 126 + cache := newTestCache(t) 127 + purger := &stubPurger{} 128 + // Subscriber is configured for did:web:operator; a label whose Src is a 129 + // different DID must be ignored (today's single-DID trust model). 130 + sub := NewSubscriber("did:web:operator", cache, purger) 131 + 132 + sub.applyLabel(1, &comatproto.LabelDefs_Label{ 133 + Src: "did:web:rogue", 134 + Uri: "at://did:plc:alice/io.atcr.manifest/abc", 135 + Val: TakedownLabelValue, 136 + Cts: time.Now().UTC().Format(time.RFC3339), 137 + }) 138 + 139 + manifests, users := purger.snapshot() 140 + if len(manifests) != 0 || len(users) != 0 { 141 + t.Fatalf("untrusted source must not trigger purge: manifests=%v users=%v", manifests, users) 142 + } 143 + } 144 + 145 + func TestSubscriberDerivesDIDFromURL(t *testing.T) { 146 + tests := []struct { 147 + input string 148 + wantURL string 149 + wantDID string 150 + }{ 151 + {"did:web:labeler.atcr.io", "https://labeler.atcr.io", "did:web:labeler.atcr.io"}, 152 + {"did:web:172.28.0.4%3A5002", "https://172.28.0.4:5002", "did:web:172.28.0.4%3A5002"}, 153 + {"http://172.28.0.4:5002", "http://172.28.0.4:5002", "did:web:172.28.0.4%3A5002"}, 154 + {"https://labeler.atcr.io", "https://labeler.atcr.io", "did:web:labeler.atcr.io"}, 155 + } 156 + for _, tt := range tests { 157 + t.Run(tt.input, func(t *testing.T) { 158 + sub := NewSubscriber(tt.input, nil, nil) 159 + if sub.labelerURL != tt.wantURL { 160 + t.Errorf("labelerURL = %q, want %q", sub.labelerURL, tt.wantURL) 161 + } 162 + if sub.labelerDID != tt.wantDID { 163 + t.Errorf("labelerDID = %q, want %q", sub.labelerDID, tt.wantDID) 164 + } 165 + }) 166 + } 167 + } 168 + 169 + func TestApplyLabelIgnoresNonTakedownValues(t *testing.T) { 170 + cache := newTestCache(t) 171 + purger := &stubPurger{} 172 + sub := NewSubscriber("did:web:labeler.example.com", cache, purger) 173 + 174 + sub.applyLabel(1, &comatproto.LabelDefs_Label{ 175 + Src: "did:web:labeler.example.com", 176 + Uri: "at://did:plc:alice/io.atcr.manifest/abc", 177 + Val: "spam", 178 + Cts: time.Now().UTC().Format(time.RFC3339), 179 + }) 180 + 181 + manifests, users := purger.snapshot() 182 + if len(manifests) != 0 || len(users) != 0 { 183 + t.Fatalf("non-takedown labels must not trigger purge: manifests=%v users=%v", manifests, users) 184 + } 185 + } 186 + 187 + func TestClassifyURI(t *testing.T) { 188 + tests := []struct { 189 + uri string 190 + kind uriKind 191 + did string 192 + }{ 193 + {"at://did:plc:alice/io.atcr.manifest/abc", uriKindManifest, "did:plc:alice"}, 194 + {"at://did:plc:alice", uriKindUser, "did:plc:alice"}, 195 + {"at://did:plc:alice/io.atcr.repo/myimage", uriKindOther, "did:plc:alice"}, 196 + {"https://example.com", uriKindOther, ""}, 197 + {"", uriKindOther, ""}, 198 + } 199 + for _, tt := range tests { 200 + t.Run(tt.uri, func(t *testing.T) { 201 + got := classifyURI(tt.uri) 202 + if got.Kind != tt.kind { 203 + t.Fatalf("kind = %s, want %s", got.Kind, tt.kind) 204 + } 205 + if got.DID != tt.did { 206 + t.Fatalf("did = %s, want %s", got.DID, tt.did) 207 + } 208 + }) 209 + } 210 + }
+254
pkg/hold/pds/purge.go
··· 1 + package pds 2 + 3 + import ( 4 + "context" 5 + "fmt" 6 + "log/slog" 7 + "strings" 8 + 9 + "atcr.io/pkg/atproto" 10 + lexutil "github.com/bluesky-social/indigo/lex/util" 11 + "github.com/bluesky-social/indigo/repo" 12 + "github.com/ipfs/go-cid" 13 + ) 14 + 15 + // PurgeResult summarizes a PurgeManifestRecords call. 16 + type PurgeResult struct { 17 + LayersDeleted int 18 + ScanDeleted bool 19 + ImageConfigDeleted bool 20 + } 21 + 22 + // PurgeManifestRecords removes layer, scan, and image-config records associated 23 + // with a single manifest AT-URI. The manifest record itself lives in the user's 24 + // PDS and is not touched. S3 blobs are not removed; the GC handles them based 25 + // on remaining references and the labeler grace window. 26 + // 27 + // Idempotent: missing records are not errors. Used by both the 28 + // io.atcr.hold.purgeManifest XRPC handler (called by the appview on UI delete) 29 + // and the hold's labeler subscriber (called on takedown receipt). 30 + func (p *HoldPDS) PurgeManifestRecords(ctx context.Context, manifestURI string) (*PurgeResult, error) { 31 + if manifestURI == "" { 32 + return nil, fmt.Errorf("manifest URI is required") 33 + } 34 + 35 + res := &PurgeResult{} 36 + 37 + // Layer records: TID rkeys, multiple per manifest. Look up the rkeys by 38 + // scanning the layer index and matching the manifest field. 39 + rkeys, err := p.listLayerRkeysForManifest(ctx, manifestURI) 40 + if err != nil { 41 + return res, fmt.Errorf("list layer rkeys: %w", err) 42 + } 43 + for _, rkey := range rkeys { 44 + if err := p.DeleteLayerRecord(ctx, rkey); err != nil { 45 + slog.Warn("Failed to delete layer record during purge", 46 + "rkey", rkey, "manifest", manifestURI, "error", err) 47 + continue 48 + } 49 + res.LayersDeleted++ 50 + } 51 + 52 + // Scan + image-config records share a deterministic rkey scheme based on 53 + // the manifest digest, so we can address them directly. 54 + manifestDigest, err := atproto.ParseManifestURI(manifestURI) 55 + if err != nil { 56 + // Without a parseable digest we can't compute the deterministic rkey. 57 + // Layer records may still have been deleted above; return what we did. 58 + slog.Warn("Cannot parse manifest URI for scan/config purge", 59 + "manifest", manifestURI, "error", err) 60 + return res, nil 61 + } 62 + rkey := atproto.ScanRecordKey(manifestDigest) 63 + res.ScanDeleted = p.tryDeleteRecord(ctx, atproto.ScanCollection, rkey) 64 + res.ImageConfigDeleted = p.tryDeleteRecord(ctx, atproto.ImageConfigCollection, rkey) 65 + 66 + slog.Info("Purged manifest records", 67 + "manifest", manifestURI, 68 + "layers", res.LayersDeleted, 69 + "scan", res.ScanDeleted, 70 + "imageConfig", res.ImageConfigDeleted, 71 + ) 72 + return res, nil 73 + } 74 + 75 + // PurgeUserManifests purges every manifest's records for a given DID. Used by 76 + // user-level takedowns (URI = at://<did>) where the labeler has not enumerated 77 + // individual manifest URIs. 78 + // 79 + // Discovers the set of manifest URIs from the layer index (every layer record 80 + // names its manifest) so we can reuse PurgeManifestRecords for each. 81 + func (p *HoldPDS) PurgeUserManifests(ctx context.Context, userDID string) (*PurgeResult, error) { 82 + if userDID == "" { 83 + return nil, fmt.Errorf("user DID is required") 84 + } 85 + if p.recordsIndex == nil { 86 + return nil, fmt.Errorf("records index not available") 87 + } 88 + 89 + manifestURIs, err := p.collectUserManifestURIs(ctx, userDID) 90 + if err != nil { 91 + return nil, fmt.Errorf("collect manifest URIs: %w", err) 92 + } 93 + 94 + combined := &PurgeResult{} 95 + for uri := range manifestURIs { 96 + r, err := p.PurgeManifestRecords(ctx, uri) 97 + if err != nil { 98 + slog.Warn("Failed to purge manifest in user-level purge", 99 + "manifest", uri, "user", userDID, "error", err) 100 + continue 101 + } 102 + combined.LayersDeleted += r.LayersDeleted 103 + if r.ScanDeleted { 104 + combined.ScanDeleted = true 105 + } 106 + if r.ImageConfigDeleted { 107 + combined.ImageConfigDeleted = true 108 + } 109 + } 110 + 111 + slog.Info("Purged all manifests for user", 112 + "user", userDID, 113 + "manifestCount", len(manifestURIs), 114 + "layersDeleted", combined.LayersDeleted, 115 + ) 116 + return combined, nil 117 + } 118 + 119 + // listLayerRkeysForManifest scans the layer record index, decodes each record 120 + // from the carstore, and returns the rkeys whose `manifest` field matches. 121 + // 122 + // Mirrors ListLayerRecordsForManifest but returns rkeys (which the caller 123 + // needs for deletion) instead of decoded record values. 124 + func (p *HoldPDS) listLayerRkeysForManifest(ctx context.Context, manifestURI string) ([]string, error) { 125 + if p.recordsIndex == nil { 126 + return nil, fmt.Errorf("records index not available") 127 + } 128 + 129 + session, err := p.carstore.ReadOnlySession(p.uid) 130 + if err != nil { 131 + return nil, fmt.Errorf("create session: %w", err) 132 + } 133 + head, err := p.carstore.GetUserRepoHead(ctx, p.uid) 134 + if err != nil { 135 + return nil, fmt.Errorf("get repo head: %w", err) 136 + } 137 + if !head.Defined() { 138 + return nil, nil 139 + } 140 + repoHandle, err := repo.OpenRepo(ctx, session, head) 141 + if err != nil { 142 + return nil, fmt.Errorf("open repo: %w", err) 143 + } 144 + 145 + var rkeys []string 146 + cursor := "" 147 + const batch = 1000 148 + for { 149 + indexRecords, nextCursor, err := p.recordsIndex.ListRecords(atproto.LayerCollection, batch, cursor, false) 150 + if err != nil { 151 + return nil, fmt.Errorf("list layer records: %w", err) 152 + } 153 + for _, rec := range indexRecords { 154 + path := rec.Collection + "/" + rec.Rkey 155 + _, recBytes, err := repoHandle.GetRecordBytes(ctx, path) 156 + if err != nil { 157 + continue 158 + } 159 + val, err := lexutil.CborDecodeValue(*recBytes) 160 + if err != nil { 161 + continue 162 + } 163 + layer, ok := val.(*atproto.LayerRecord) 164 + if !ok { 165 + continue 166 + } 167 + if layer.Manifest == manifestURI { 168 + rkeys = append(rkeys, rec.Rkey) 169 + } 170 + } 171 + if nextCursor == "" { 172 + break 173 + } 174 + cursor = nextCursor 175 + } 176 + return rkeys, nil 177 + } 178 + 179 + // collectUserManifestURIs walks the user's layer records and returns the set 180 + // of unique manifest AT-URIs they reference. 181 + func (p *HoldPDS) collectUserManifestURIs(ctx context.Context, userDID string) (map[string]struct{}, error) { 182 + uris := make(map[string]struct{}) 183 + 184 + session, err := p.carstore.ReadOnlySession(p.uid) 185 + if err != nil { 186 + return nil, fmt.Errorf("create session: %w", err) 187 + } 188 + head, err := p.carstore.GetUserRepoHead(ctx, p.uid) 189 + if err != nil { 190 + return nil, fmt.Errorf("get repo head: %w", err) 191 + } 192 + if !head.Defined() { 193 + return uris, nil 194 + } 195 + repoHandle, err := repo.OpenRepo(ctx, session, head) 196 + if err != nil { 197 + return nil, fmt.Errorf("open repo: %w", err) 198 + } 199 + 200 + cursor := "" 201 + const batch = 200 202 + for { 203 + records, nextCursor, err := p.recordsIndex.ListRecordsByDID(atproto.LayerCollection, userDID, batch, cursor) 204 + if err != nil { 205 + return nil, fmt.Errorf("list layer records by DID: %w", err) 206 + } 207 + for _, rec := range records { 208 + path := rec.Collection + "/" + rec.Rkey 209 + _, recBytes, err := repoHandle.GetRecordBytes(ctx, path) 210 + if err != nil { 211 + continue 212 + } 213 + val, err := lexutil.CborDecodeValue(*recBytes) 214 + if err != nil { 215 + continue 216 + } 217 + layer, ok := val.(*atproto.LayerRecord) 218 + if !ok { 219 + continue 220 + } 221 + if layer.Manifest != "" && strings.HasPrefix(layer.Manifest, "at://"+userDID+"/") { 222 + uris[layer.Manifest] = struct{}{} 223 + } 224 + } 225 + if nextCursor == "" { 226 + break 227 + } 228 + cursor = nextCursor 229 + } 230 + return uris, nil 231 + } 232 + 233 + // tryDeleteRecord deletes a record at the given collection/rkey from both the 234 + // repo and the records index. Returns true if a record was actually deleted, 235 + // false if it didn't exist or the delete failed (failures are logged). 236 + func (p *HoldPDS) tryDeleteRecord(ctx context.Context, collection, rkey string) bool { 237 + // Probe via GetRecord — if the record doesn't exist we skip silently. 238 + if _, _, err := p.repomgr.GetRecord(ctx, p.uid, collection, rkey, cid.Undef); err != nil { 239 + return false 240 + } 241 + 242 + if err := p.repomgr.DeleteRecord(ctx, p.uid, collection, rkey); err != nil { 243 + slog.Warn("Failed to delete record from repo", 244 + "collection", collection, "rkey", rkey, "error", err) 245 + return false 246 + } 247 + if p.recordsIndex != nil { 248 + if err := p.recordsIndex.DeleteRecord(collection, rkey); err != nil { 249 + slog.Warn("Failed to delete record from index", 250 + "collection", collection, "rkey", rkey, "error", err) 251 + } 252 + } 253 + return true 254 + }
+150
pkg/hold/pds/purge_test.go
··· 1 + package pds 2 + 3 + import ( 4 + "strings" 5 + "testing" 6 + 7 + "atcr.io/pkg/atproto" 8 + ) 9 + 10 + func TestPurgeManifestRecordsRemovesAll(t *testing.T) { 11 + pds := setupTestPDSWithIndex(t, "did:plc:owner") 12 + ctx := sharedCtx 13 + 14 + const userDID = "did:plc:alice" 15 + const manifestDigest = "sha256:e692418e4cbaf90ca69d05a66403747baa33ee08806650b51fab815ad7fc331f" 16 + manifestURI := atproto.BuildManifestURI(userDID, manifestDigest) 17 + 18 + // Two layer records for the same manifest plus one for a different one 19 + mustCreateLayer(t, pds, manifestURI, "sha256:layer-a", 1024) 20 + mustCreateLayer(t, pds, manifestURI, "sha256:layer-b", 2048) 21 + otherURI := atproto.BuildManifestURI(userDID, "sha256:0123456789abcdef") 22 + mustCreateLayer(t, pds, otherURI, "sha256:layer-c", 4096) 23 + 24 + // Scan record at the deterministic rkey 25 + scanRkey := atproto.ScanRecordKey(manifestDigest) 26 + scanRec := &atproto.ScanRecord{ 27 + Type: atproto.ScanCollection, 28 + Manifest: manifestURI, 29 + ScannedAt: "2026-05-02T00:00:00Z", 30 + } 31 + if _, _, _, err := pds.repomgr.UpsertRecord(ctx, pds.uid, atproto.ScanCollection, scanRkey, scanRec); err != nil { 32 + t.Fatalf("create scan record: %v", err) 33 + } 34 + 35 + // Image config record at the same deterministic rkey scheme 36 + cfgRec := &atproto.ImageConfigRecord{ 37 + Type: atproto.ImageConfigCollection, 38 + Manifest: manifestURI, 39 + } 40 + if _, _, _, err := pds.repomgr.UpsertRecord(ctx, pds.uid, atproto.ImageConfigCollection, scanRkey, cfgRec); err != nil { 41 + t.Fatalf("create image config record: %v", err) 42 + } 43 + 44 + res, err := pds.PurgeManifestRecords(ctx, manifestURI) 45 + if err != nil { 46 + t.Fatalf("PurgeManifestRecords: %v", err) 47 + } 48 + if res.LayersDeleted != 2 { 49 + t.Errorf("LayersDeleted = %d, want 2", res.LayersDeleted) 50 + } 51 + if !res.ScanDeleted { 52 + t.Errorf("ScanDeleted = false, want true") 53 + } 54 + if !res.ImageConfigDeleted { 55 + t.Errorf("ImageConfigDeleted = false, want true") 56 + } 57 + 58 + // Layer records for OTHER manifest must remain. 59 + rkeys, err := pds.listLayerRkeysForManifest(ctx, otherURI) 60 + if err != nil { 61 + t.Fatalf("list other layers: %v", err) 62 + } 63 + if len(rkeys) != 1 { 64 + t.Fatalf("other manifest layer rkeys = %d, want 1", len(rkeys)) 65 + } 66 + } 67 + 68 + func TestPurgeManifestRecordsIdempotent(t *testing.T) { 69 + pds := setupTestPDSWithIndex(t, "did:plc:owner") 70 + ctx := sharedCtx 71 + 72 + manifestURI := atproto.BuildManifestURI("did:plc:alice", "sha256:e692418e4cbaf90ca69d05a66403747baa33ee08806650b51fab815ad7fc331f") 73 + 74 + res, err := pds.PurgeManifestRecords(ctx, manifestURI) 75 + if err != nil { 76 + t.Fatalf("first PurgeManifestRecords: %v", err) 77 + } 78 + if res.LayersDeleted != 0 || res.ScanDeleted || res.ImageConfigDeleted { 79 + t.Fatalf("expected zero-result purge on empty hold, got %+v", res) 80 + } 81 + 82 + // Second call must also succeed (idempotent). 83 + if _, err := pds.PurgeManifestRecords(ctx, manifestURI); err != nil { 84 + t.Fatalf("second PurgeManifestRecords: %v", err) 85 + } 86 + } 87 + 88 + func TestPurgeManifestRecordsRequiresURI(t *testing.T) { 89 + pds := setupTestPDSWithIndex(t, "did:plc:owner") 90 + if _, err := pds.PurgeManifestRecords(sharedCtx, ""); err == nil || !strings.Contains(err.Error(), "manifest URI is required") { 91 + t.Fatalf("expected manifest URI error, got %v", err) 92 + } 93 + } 94 + 95 + func TestPurgeUserManifestsCollectsAcrossManifests(t *testing.T) { 96 + pds := setupTestPDSWithIndex(t, "did:plc:owner") 97 + ctx := sharedCtx 98 + 99 + const userDID = "did:plc:alice" 100 + manifestA := atproto.BuildManifestURI(userDID, "sha256:e692418e4cbaf90ca69d05a66403747baa33ee08806650b51fab815ad7fc331f") 101 + manifestB := atproto.BuildManifestURI(userDID, "sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef") 102 + 103 + mustCreateLayer(t, pds, manifestA, "sha256:layer-a", 100) 104 + mustCreateLayer(t, pds, manifestA, "sha256:layer-b", 200) 105 + mustCreateLayer(t, pds, manifestB, "sha256:layer-c", 300) 106 + 107 + // Different user's layer must survive 108 + manifestOther := atproto.BuildManifestURI("did:plc:bob", "sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb") 109 + mustCreateLayer(t, pds, manifestOther, "sha256:layer-other", 400) 110 + 111 + res, err := pds.PurgeUserManifests(ctx, userDID) 112 + if err != nil { 113 + t.Fatalf("PurgeUserManifests: %v", err) 114 + } 115 + if res.LayersDeleted != 3 { 116 + t.Errorf("LayersDeleted = %d, want 3", res.LayersDeleted) 117 + } 118 + 119 + bobRkeys, err := pds.listLayerRkeysForManifest(ctx, manifestOther) 120 + if err != nil { 121 + t.Fatalf("list bob's layers: %v", err) 122 + } 123 + if len(bobRkeys) != 1 { 124 + t.Fatalf("bob's layer count = %d, want 1 (purge crossed user boundary)", len(bobRkeys)) 125 + } 126 + } 127 + 128 + // mustCreateLayer is a tiny helper for purge tests — keeps the table-driven 129 + // TestPurge body focused on the assertions. 130 + func mustCreateLayer(t *testing.T, pds *HoldPDS, manifestURI, digest string, size int64) { 131 + t.Helper() 132 + rec := atproto.NewLayerRecord(digest, size, "application/vnd.oci.image.layer.v1.tar+gzip", didFromManifest(manifestURI), manifestURI) 133 + if _, _, err := pds.CreateLayerRecord(sharedCtx, rec); err != nil { 134 + t.Fatalf("CreateLayerRecord(%s): %v", digest, err) 135 + } 136 + } 137 + 138 + // didFromManifest pulls the authority out of an at:// URI for layer record 139 + // construction in tests. Production code uses richer helpers. 140 + func didFromManifest(uri string) string { 141 + const prefix = "at://" 142 + if !strings.HasPrefix(uri, prefix) { 143 + return "" 144 + } 145 + rest := uri[len(prefix):] 146 + if i := strings.IndexByte(rest, '/'); i >= 0 { 147 + return rest[:i] 148 + } 149 + return rest 150 + }
+37
pkg/hold/pds/xrpc.go
··· 203 203 r.Use(h.requireOwnerOrCrewAdmin) 204 204 r.Post(atproto.RepoDeleteRecord, h.HandleDeleteRecord) 205 205 r.Post(atproto.RepoUploadBlob, h.HandleUploadBlob) 206 + r.Post(atproto.HoldPurgeManifest, h.HandlePurgeManifest) 206 207 }) 207 208 208 209 // Auth-only endpoints (DPoP auth) ··· 888 889 "cid": head.String(), 889 890 "rev": rev, 890 891 }, 892 + }) 893 + } 894 + 895 + // HandlePurgeManifest deletes layer, scan, and image-config records associated 896 + // with a single manifest AT-URI. Idempotent. Auth: owner or crew admin 897 + // (enforced by middleware). The manifest record itself lives in the user's PDS 898 + // and is not affected. S3 blobs are not removed; the GC handles those based on 899 + // remaining references and the labeler grace window. 900 + func (h *XRPCHandler) HandlePurgeManifest(w http.ResponseWriter, r *http.Request) { 901 + var input struct { 902 + ManifestURI string `json:"manifestUri"` 903 + } 904 + if err := json.NewDecoder(r.Body).Decode(&input); err != nil { 905 + http.Error(w, fmt.Sprintf("invalid JSON body: %v", err), http.StatusBadRequest) 906 + return 907 + } 908 + if input.ManifestURI == "" { 909 + http.Error(w, "manifestUri is required", http.StatusBadRequest) 910 + return 911 + } 912 + if !strings.HasPrefix(input.ManifestURI, "at://") { 913 + http.Error(w, "manifestUri must be an at:// URI", http.StatusBadRequest) 914 + return 915 + } 916 + 917 + res, err := h.pds.PurgeManifestRecords(r.Context(), input.ManifestURI) 918 + if err != nil { 919 + http.Error(w, fmt.Sprintf("purge failed: %v", err), http.StatusInternalServerError) 920 + return 921 + } 922 + 923 + render.JSON(w, r, map[string]any{ 924 + "success": true, 925 + "layersDeleted": res.LayersDeleted, 926 + "scanDeleted": res.ScanDeleted, 927 + "imageConfigDeleted": res.ImageConfigDeleted, 891 928 }) 892 929 } 893 930
+75 -7
pkg/hold/server.go
··· 15 15 "atcr.io/pkg/hold/admin" 16 16 holddb "atcr.io/pkg/hold/db" 17 17 "atcr.io/pkg/hold/gc" 18 + holdlabeler "atcr.io/pkg/hold/labeler" 18 19 "atcr.io/pkg/hold/oci" 19 20 "atcr.io/pkg/hold/pds" 20 21 "atcr.io/pkg/hold/quota" ··· 25 26 "github.com/go-chi/chi/v5/middleware" 26 27 ) 27 28 29 + // purgerAdapter bridges *pds.HoldPDS to the holdlabeler.Purger interface, which 30 + // uses its own outcome type to avoid an import cycle with pkg/hold/pds. 31 + type purgerAdapter struct { 32 + pds *pds.HoldPDS 33 + } 34 + 35 + func (a purgerAdapter) PurgeManifestRecords(ctx context.Context, manifestURI string) (holdlabeler.PurgeOutcome, error) { 36 + r, err := a.pds.PurgeManifestRecords(ctx, manifestURI) 37 + if err != nil || r == nil { 38 + return holdlabeler.PurgeOutcome{}, err 39 + } 40 + return holdlabeler.PurgeOutcome{ 41 + LayersDeleted: r.LayersDeleted, 42 + ScanDeleted: r.ScanDeleted, 43 + ImageConfigDeleted: r.ImageConfigDeleted, 44 + }, nil 45 + } 46 + 47 + func (a purgerAdapter) PurgeUserManifests(ctx context.Context, userDID string) (holdlabeler.PurgeOutcome, error) { 48 + r, err := a.pds.PurgeUserManifests(ctx, userDID) 49 + if err != nil || r == nil { 50 + return holdlabeler.PurgeOutcome{}, err 51 + } 52 + return holdlabeler.PurgeOutcome{ 53 + LayersDeleted: r.LayersDeleted, 54 + ScanDeleted: r.ScanDeleted, 55 + ImageConfigDeleted: r.ImageConfigDeleted, 56 + }, nil 57 + } 58 + 28 59 // HoldServer is the hold service with an exposed router for extensibility. 29 60 // Consumers can add routes to Router before calling Serve(). 30 61 type HoldServer struct { ··· 41 72 Config *Config 42 73 43 74 // internal fields for shutdown 44 - httpServer *http.Server 45 - broadcaster *pds.EventBroadcaster 46 - scanBroadcaster *pds.ScanBroadcaster 47 - garbageCollector *gc.GarbageCollector 48 - adminUI *admin.AdminUI 49 - holdDB *holddb.HoldDB // shared database connection (nil for :memory:) 75 + httpServer *http.Server 76 + broadcaster *pds.EventBroadcaster 77 + scanBroadcaster *pds.ScanBroadcaster 78 + garbageCollector *gc.GarbageCollector 79 + adminUI *admin.AdminUI 80 + holdDB *holddb.HoldDB // shared database connection (nil for :memory:) 81 + labelerSubscriber *holdlabeler.Subscriber 82 + labelerCache *holdlabeler.Cache 50 83 } 51 84 52 85 // NewHoldServer initializes PDS, storage, quota, XRPC handlers, and returns ··· 210 243 "rescanInterval", rescanInterval) 211 244 } 212 245 246 + // Initialize labeler cache + subscriber if a labeler is configured. 247 + // The cache is created either way so the GC can take a non-nil 248 + // pointer; without a configured DID it just stays empty and exerts 249 + // no effect. 250 + if s.holdDB != nil { 251 + cache, cacheErr := holdlabeler.NewCache(s.holdDB.DB) 252 + if cacheErr != nil { 253 + return nil, fmt.Errorf("failed to initialize labeler cache: %w", cacheErr) 254 + } 255 + s.labelerCache = cache 256 + if cfg.Labeler.DID != "" { 257 + s.labelerSubscriber = holdlabeler.NewSubscriber( 258 + cfg.Labeler.DID, 259 + s.labelerCache, 260 + purgerAdapter{pds: s.PDS}, 261 + ) 262 + slog.Info("Hold labeler subscriber initialized", 263 + "labeler", cfg.Labeler.DID, 264 + "grace_window", cfg.Labeler.GraceWindow) 265 + } 266 + } 267 + 213 268 // Initialize garbage collector 214 - s.garbageCollector = gc.NewGarbageCollector(s.PDS, s3Service, cfg.GC) 269 + s.garbageCollector = gc.NewGarbageCollector(s.PDS, s3Service, cfg.GC, 270 + gc.WithTakedownCache(s.labelerCache, cfg.Labeler.GraceWindow)) 215 271 slog.Info("Garbage collector initialized", 216 272 "enabled", cfg.GC.Enabled) 217 273 } ··· 333 389 s.garbageCollector.Start(context.Background()) 334 390 } 335 391 392 + // Start labeler subscriber if configured. 393 + if s.labelerSubscriber != nil { 394 + s.labelerSubscriber.Start() 395 + slog.Info("Hold labeler subscriber started", "labeler_did", s.labelerSubscriber.LabelerDID()) 396 + } 397 + 336 398 // Wait for signal or server error 337 399 select { 338 400 case err := <-serverErr: ··· 362 424 if s.garbageCollector != nil { 363 425 s.garbageCollector.Stop() 364 426 slog.Info("Garbage collector stopped") 427 + } 428 + 429 + // Stop labeler subscriber 430 + if s.labelerSubscriber != nil { 431 + s.labelerSubscriber.Stop() 432 + slog.Info("Labeler subscriber stopped") 365 433 } 366 434 367 435 // Close scan broadcaster database connection
+1 -1
pkg/labeler/handlers.go
··· 21 21 } 22 22 23 23 errorMsg := r.URL.Query().Get("error") 24 - w.Header().Set("Content-Type", "text/html") 24 + w.Header().Set("Content-Type", "text/html; charset=utf-8") 25 25 fmt.Fprintf(w, `<!DOCTYPE html> 26 26 <html> 27 27 <head><title>%s Labeler - Login</title>
+94 -10
pkg/labeler/takedown.go
··· 405 405 return 406 406 } 407 407 408 + // Pre-fetch labels per visible takedown so the expand-in-place rows can render 409 + // without a round-trip. N+1 queries are fine here — both lists are paginated to 410 + // 50, this is admin-only, and it keeps the UI JS-light (just a toggle). 411 + labelsByTakedown := make(map[int64][]Label, len(active)+len(reversed)) 412 + for _, t := range append(append([]Takedown{}, active...), reversed...) { 413 + labels, err := GetLabelsByTakedown(s.db, t.ID) 414 + if err != nil { 415 + slog.Warn("Failed to load labels for takedown", "takedown_id", t.ID, "error", err) 416 + continue 417 + } 418 + labelsByTakedown[t.ID] = labels 419 + } 420 + 408 421 csrf := "" 409 422 if session := SessionFromContext(r.Context()); session != nil { 410 423 csrf = session.CSRFToken 411 424 } 412 425 413 - w.Header().Set("Content-Type", "text/html") 426 + w.Header().Set("Content-Type", "text/html; charset=utf-8") 414 427 fmt.Fprintf(w, `<!DOCTYPE html> 415 428 <html> 416 429 <head><title>%s Labeler</title> ··· 427 440 form{display:inline} 428 441 code{background:#f4f4f5;padding:1px 4px;border-radius:3px} 429 442 .reason{max-width:280px;white-space:pre-wrap} 443 + .toggle{background:none;border:1px solid #d4d4d8;border-radius:4px;padding:2px 8px;font:inherit;cursor:pointer;color:#374151} 444 + .toggle:hover{background:#f4f4f5} 445 + .toggle .caret{display:inline-block;transition:transform .15s ease;margin-right:4px} 446 + .toggle[aria-expanded="true"] .caret{transform:rotate(90deg)} 447 + .detail-row td{background:#fafafa;padding:0} 448 + .detail-row .inner{padding:12px 16px} 449 + .label-list{width:100%%;border-collapse:collapse;font-size:0.88em} 450 + .label-list th,.label-list td{border-bottom:1px solid #eee;padding:4px 8px;background:#fafafa} 451 + .label-list th{background:#f1f1f3} 452 + .tag{display:inline-block;padding:1px 6px;border-radius:3px;font-size:0.85em} 453 + .tag-active{background:#fee2e2;color:#991b1b} 454 + .tag-neg{background:#dcfce7;color:#166534} 430 455 </style> 431 456 </head> 432 457 <body> ··· 442 467 activeTotal, 443 468 ) 444 469 445 - renderTakedownRows(w, active, csrf, true) 470 + renderTakedownRows(w, active, labelsByTakedown, csrf, true) 446 471 447 472 fmt.Fprintf(w, `<h2>Reversed (%d)</h2>`, reversedTotal) 448 - renderTakedownRows(w, reversed, csrf, false) 473 + renderTakedownRows(w, reversed, labelsByTakedown, csrf, false) 449 474 450 - fmt.Fprint(w, `</body></html>`) 475 + // Tiny inline toggle: flips [hidden] on the sibling detail row and the 476 + // aria-expanded attribute on the button (which the .caret CSS rotates). 477 + fmt.Fprint(w, `<script> 478 + document.addEventListener('click', function(e) { 479 + var btn = e.target.closest('.toggle[data-target]'); 480 + if (!btn) return; 481 + var row = document.getElementById(btn.dataset.target); 482 + if (!row) return; 483 + var open = row.hasAttribute('hidden'); 484 + if (open) { row.removeAttribute('hidden'); } else { row.setAttribute('hidden', ''); } 485 + btn.setAttribute('aria-expanded', open ? 'true' : 'false'); 486 + }); 487 + </script> 488 + </body></html>`) 451 489 } 452 490 453 491 // renderTakedownRows writes either an active table (with a Reverse button) or a 454 - // reversed-history table (with a reversed-at column instead). 455 - func renderTakedownRows(w http.ResponseWriter, ts []Takedown, csrf string, withReverse bool) { 492 + // reversed-history table (with a reversed-at column instead). Each main row is 493 + // followed by a hidden detail row that the inline JS toggles to show the labels 494 + // linked to that takedown. 495 + func renderTakedownRows(w http.ResponseWriter, ts []Takedown, labelsByID map[int64][]Label, csrf string, withReverse bool) { 456 496 if len(ts) == 0 { 457 497 if withReverse { 458 498 fmt.Fprint(w, `<p class="muted">No active takedowns.</p>`) ··· 461 501 } 462 502 return 463 503 } 504 + const totalCols = 6 464 505 fmt.Fprint(w, `<table><tr><th>Input</th><th>Subject</th><th>Reason</th><th>Labels</th><th>Created</th>`) 465 506 if withReverse { 466 507 fmt.Fprint(w, `<th>Action</th>`) ··· 503 544 lastCol = rev + by 504 545 } 505 546 547 + detailID := fmt.Sprintf("td-%d-detail", t.ID) 506 548 fmt.Fprintf(w, `<tr> 507 549 <td><code>%s</code></td> 508 550 <td>%s</td> 509 551 <td class="reason">%s</td> 510 - <td>%d</td> 552 + <td><button type="button" class="toggle" data-target="%s" aria-expanded="false" aria-controls="%s"><span class="caret">▶</span>%d</button></td> 511 553 <td>%s</td> 512 554 <td>%s</td> 513 - </tr>`, 555 + </tr> 556 + <tr class="detail-row" id="%s" hidden><td colspan="%d"><div class="inner">%s</div></td></tr>`, 514 557 template.HTMLEscapeString(t.Input), 515 558 subject, 516 559 reason, 517 - t.LabelCount, 560 + detailID, detailID, t.LabelCount, 518 561 t.CreatedAt.Format("2006-01-02 15:04"), 519 562 lastCol, 563 + detailID, totalCols, renderLabelList(labelsByID[t.ID]), 520 564 ) 521 565 } 522 566 fmt.Fprint(w, `</table>`) 523 567 } 524 568 569 + // renderLabelList returns an HTML fragment listing every label linked to a takedown, 570 + // marking each as active (neg=0 with no later neg=1 row) or negated. Pure string 571 + // build-up so it can be embedded inside a <td> via fmt.Fprintf. 572 + func renderLabelList(labels []Label) string { 573 + if len(labels) == 0 { 574 + return `<span class="muted">No labels recorded for this takedown.</span>` 575 + } 576 + 577 + // Compute which positive labels have been overridden by a later negation row 578 + // (same URI). Used to badge the "active" vs "negated" state correctly even 579 + // when the takedown row itself is still marked active. 580 + negatedURIs := make(map[string]bool, len(labels)) 581 + for _, l := range labels { 582 + if l.Neg { 583 + negatedURIs[l.URI] = true 584 + } 585 + } 586 + 587 + var b strings.Builder 588 + b.WriteString(`<table class="label-list"><tr><th>State</th><th>URI</th><th>Created</th></tr>`) 589 + for _, l := range labels { 590 + var tag string 591 + switch { 592 + case l.Neg: 593 + tag = `<span class="tag tag-neg">negation</span>` 594 + case negatedURIs[l.URI]: 595 + tag = `<span class="tag tag-neg">negated</span>` 596 + default: 597 + tag = `<span class="tag tag-active">active</span>` 598 + } 599 + fmt.Fprintf(&b, `<tr><td>%s</td><td><code>%s</code></td><td>%s</td></tr>`, 600 + tag, 601 + template.HTMLEscapeString(l.URI), 602 + l.Cts.Format("2006-01-02 15:04"), 603 + ) 604 + } 605 + b.WriteString(`</table>`) 606 + return b.String() 607 + } 608 + 525 609 func (s *Server) handleTakedownForm(w http.ResponseWriter, r *http.Request) { 526 610 msg := r.URL.Query().Get("msg") 527 611 errorMsg := r.URL.Query().Get("error") ··· 530 614 csrf = session.CSRFToken 531 615 } 532 616 533 - w.Header().Set("Content-Type", "text/html") 617 + w.Header().Set("Content-Type", "text/html; charset=utf-8") 534 618 fmt.Fprintf(w, `<!DOCTYPE html> 535 619 <html> 536 620 <head><title>%s Labeler - New Takedown</title>