···11+description: Replace labels (append-only mirror) with taken_down_subjects + labeler_cursor (current-state model)
22+query: |
33+ DROP TABLE IF EXISTS labels;
44+ CREATE TABLE IF NOT EXISTS taken_down_subjects (
55+ src TEXT NOT NULL,
66+ did TEXT NOT NULL,
77+ repo TEXT NOT NULL DEFAULT '',
88+ cts TIMESTAMP NOT NULL,
99+ PRIMARY KEY (src, did, repo)
1010+ );
1111+ CREATE INDEX IF NOT EXISTS idx_taken_down_subjects_did ON taken_down_subjects(did);
1212+ CREATE INDEX IF NOT EXISTS idx_taken_down_subjects_did_repo ON taken_down_subjects(did, repo);
1313+ CREATE TABLE IF NOT EXISTS labeler_cursor (
1414+ src TEXT PRIMARY KEY,
1515+ cursor INTEGER NOT NULL
1616+ );
+5-17
pkg/appview/db/queries.go
···1919// activeTakedownClause returns a SQL fragment ready to drop into a `WHERE NOT
2020// EXISTS (...)` filter for excluding rows whose `(did, repository)` pair is currently
2121// taken down. The `alias` argument is the outer table alias (e.g. "m" for manifests,
2222-// "lm" for latest_manifests) and must already be in scope at the use site.
2323-//
2424-// Mirrors the semantics of `IsTakenDown` (defined in labels.go) so listings stay
2525-// consistent with the per-repo page check: a label only counts as active when it has
2626-// neg=0, no newer neg=1 row with the same (src, uri, val), and a non-expired `exp`.
2727-// Without these clauses listings hide a repo forever once you've ever taken it down,
2828-// even after a reversal.
2222+// "lm" for latest_manifests) and must already be in scope at the use site. Mirrors
2323+// `IsTakenDown` so listings stay consistent with the per-repo page check.
2924func activeTakedownClause(alias string) string {
3025 return `NOT EXISTS (
3131- SELECT 1 FROM labels l1
3232- WHERE l1.subject_did = ` + alias + `.did
3333- AND (l1.subject_repo = ` + alias + `.repository OR l1.subject_repo = '')
3434- AND l1.val = '!takedown' AND l1.neg = 0
3535- AND NOT EXISTS (
3636- SELECT 1 FROM labels l2
3737- WHERE l2.src = l1.src AND l2.uri = l1.uri AND l2.val = l1.val
3838- AND l2.neg = 1 AND l2.id > l1.id
3939- )
4040- AND (l1.exp IS NULL OR datetime(l1.exp) > CURRENT_TIMESTAMP)
2626+ SELECT 1 FROM taken_down_subjects t
2727+ WHERE t.did = ` + alias + `.did
2828+ AND (t.repo = ` + alias + `.repository OR t.repo = '')
4129 )`
4230}
4331
+11-12
pkg/appview/db/schema.sql
···299299 created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
300300);
301301302302-CREATE TABLE IF NOT EXISTS labels (
303303- id INTEGER PRIMARY KEY AUTOINCREMENT,
302302+CREATE TABLE IF NOT EXISTS taken_down_subjects (
304303 src TEXT NOT NULL,
305305- uri TEXT NOT NULL,
306306- val TEXT NOT NULL,
307307- neg BOOLEAN NOT NULL DEFAULT 0,
304304+ did TEXT NOT NULL,
305305+ repo TEXT NOT NULL DEFAULT '',
308306 cts TIMESTAMP NOT NULL,
309309- exp TIMESTAMP,
310310- subject_did TEXT NOT NULL,
311311- subject_repo TEXT NOT NULL DEFAULT '',
312312- seq INTEGER NOT NULL DEFAULT 0,
313313- UNIQUE(src, uri, val, neg)
307307+ PRIMARY KEY (src, did, repo)
308308+);
309309+CREATE INDEX IF NOT EXISTS idx_taken_down_subjects_did ON taken_down_subjects(did);
310310+CREATE INDEX IF NOT EXISTS idx_taken_down_subjects_did_repo ON taken_down_subjects(did, repo);
311311+312312+CREATE TABLE IF NOT EXISTS labeler_cursor (
313313+ src TEXT PRIMARY KEY,
314314+ cursor INTEGER NOT NULL
314315);
315315-CREATE INDEX IF NOT EXISTS idx_labels_subject ON labels(subject_did, subject_repo);
316316-CREATE INDEX IF NOT EXISTS idx_labels_val ON labels(val);
+106-74
pkg/appview/labeler/subscriber.go
···1919 "github.com/gorilla/websocket"
2020)
21212222-// Subscriber connects to a labeler's subscribeLabels endpoint
2323-// and mirrors labels into the appview database.
2222+// TakedownLabelValue is the only label value the appview honors.
2323+const TakedownLabelValue = "!takedown"
2424+2525+// Subscriber connects to a labeler's subscribeLabels endpoint and mirrors
2626+// the current set of active takedowns into the appview database.
2427type Subscriber struct {
2528 labelerURL string
2929+ labelerDID string
2630 database *sql.DB
2731 stopCh chan struct{}
2832}
29333030-// NewSubscriber creates a new labeler subscriber.
3131-func NewSubscriber(labelerURL string, database *sql.DB) *Subscriber {
3434+// NewSubscriber creates a new labeler subscriber. labelerDIDOrURL is the
3535+// original config value (used to preserve a configured did:web identifier
3636+// when present); labelerURL is the resolved HTTP(S) endpoint.
3737+func NewSubscriber(labelerDIDOrURL, labelerURL string, database *sql.DB) *Subscriber {
3238 return &Subscriber{
3339 labelerURL: labelerURL,
4040+ labelerDID: deriveLabelerDID(labelerDIDOrURL, labelerURL),
3441 database: database,
3542 stopCh: make(chan struct{}),
3643 }
···7683}
77847885func (s *Subscriber) connect() error {
7979- // Get cursor from DB
8080- // Use the labeler URL as src identifier
8181- labelerDID := extractDIDFromURL(s.labelerURL)
8282- cursor, err := db.GetLabelCursor(s.database, labelerDID)
8686+ cursor, err := db.GetCursor(s.database, s.labelerDID)
8387 if err != nil {
8488 return fmt.Errorf("failed to get cursor: %w", err)
8589 }
86908787- // Build WebSocket URL
8891 wsURL := toWebSocketURL(s.labelerURL) + "/xrpc/com.atproto.label.subscribeLabels"
8992 if cursor > 0 {
9093 wsURL += fmt.Sprintf("?cursor=%d", cursor)
···120123 seq, labels, err := decodeFrame(payload)
121124 if err != nil {
122125 if errors.Is(err, errInfoFrame) {
123123- continue // already logged inside decodeFrame
126126+ continue
124127 }
125128 return fmt.Errorf("decode frame: %w", err)
126129 }
127130128131 for _, le := range labels {
129129- cts, _ := time.Parse(time.RFC3339, le.Cts)
130130- did, repo := extractSubjectFromURI(le.Uri)
132132+ s.applyLabel(le)
133133+ }
131134132132- // Exp is optional in the ATProto label spec — treat unparseable
133133- // values as "no expiration" rather than dropping the label.
134134- var exp *time.Time
135135- if le.Exp != nil {
136136- if t, err := time.Parse(time.RFC3339, *le.Exp); err == nil {
137137- exp = &t
138138- }
139139- }
135135+ if err := db.SetCursor(s.database, s.labelerDID, seq); err != nil {
136136+ slog.Warn("Failed to persist labeler cursor", "seq", seq, "error", err)
137137+ }
138138+ }
139139+}
140140141141- label := &db.Label{
142142- Src: le.Src,
143143- URI: le.Uri,
144144- Val: le.Val,
145145- Neg: le.Neg != nil && *le.Neg,
146146- Cts: cts,
147147- Exp: exp,
148148- SubjectDID: did,
149149- SubjectRepo: repo,
150150- Seq: seq,
151151- }
141141+// applyLabel processes a single label. The appview only honors !takedown labels
142142+// from the configured labeler, and only at the granularity it can enforce —
143143+// user-level (at://<did>) and repo summary (at://<did>/io.atcr.repo/<repo>).
144144+// Per-record labels (per manifest, tag, repo-page) are dropped; the registry
145145+// middleware gates per (did, repo) so finer granularity has no effect.
146146+func (s *Subscriber) applyLabel(le *comatproto.LabelDefs_Label) {
147147+ if le == nil {
148148+ return
149149+ }
150150+ if le.Val != TakedownLabelValue {
151151+ return
152152+ }
153153+ if le.Src != s.labelerDID {
154154+ slog.Debug("Ignoring label from untrusted source", "src", le.Src, "uri", le.Uri)
155155+ return
156156+ }
152157153153- if err := db.UpsertLabel(s.database, label); err != nil {
154154- slog.Warn("Failed to upsert label", "uri", le.Uri, "error", err)
155155- continue
156156- }
158158+ shape := classifyURI(le.Uri)
159159+ if shape.kind == uriOther {
160160+ slog.Debug("Skipping non-enforced label", "uri", le.Uri)
161161+ return
162162+ }
157163158158- // "Mirrored label X" reads as an apply; reversals are a different action
159159- // from the operator's POV (and a different SQL effect — the NOT EXISTS
160160- // negation clause kicks in), so log them distinctly.
161161- msg := "Mirrored label"
162162- if label.Neg {
163163- msg = "Mirrored label reversal"
164164- }
165165- slog.Info(msg,
166166- "uri", le.Uri,
167167- "val", le.Val,
168168- "neg", label.Neg,
169169- "subject_did", did,
170170- "subject_repo", repo,
171171- )
164164+ negated := le.Neg != nil && *le.Neg
165165+ if negated {
166166+ if err := db.RemoveTakedown(s.database, le.Src, shape.did, shape.repo); err != nil {
167167+ slog.Warn("Failed to remove takedown", "uri", le.Uri, "error", err)
168168+ return
172169 }
170170+ slog.Info("Mirrored takedown reversal",
171171+ "src", le.Src, "did", shape.did, "repo", shape.repo)
172172+ return
173173 }
174174+175175+ cts, _ := time.Parse(time.RFC3339, le.Cts)
176176+ if err := db.SetTakedown(s.database, le.Src, shape.did, shape.repo, cts); err != nil {
177177+ slog.Warn("Failed to record takedown", "uri", le.Uri, "error", err)
178178+ return
179179+ }
180180+ slog.Info("Mirrored takedown",
181181+ "src", le.Src, "did", shape.did, "repo", shape.repo)
182182+}
183183+184184+// uriShape captures the parts of a label subject URI that the appview cares about.
185185+type uriShape struct {
186186+ kind uriKind
187187+ did string
188188+ repo string
189189+}
190190+191191+type uriKind int
192192+193193+const (
194194+ uriOther uriKind = iota
195195+ uriUserLevel
196196+ uriRepoSummary
197197+)
198198+199199+// classifyURI reports whether the URI is a user-level subject (at://<did>),
200200+// a repo summary (at://<did>/io.atcr.repo/<repo>), or something else
201201+// (per-record manifest/tag/repo-page labels we don't enforce).
202202+func classifyURI(uri string) uriShape {
203203+ const prefix = "at://"
204204+ if !strings.HasPrefix(uri, prefix) {
205205+ return uriShape{}
206206+ }
207207+ rest := uri[len(prefix):]
208208+ parts := strings.SplitN(rest, "/", 3)
209209+ if len(parts) == 0 || parts[0] == "" {
210210+ return uriShape{}
211211+ }
212212+ did := parts[0]
213213+ if len(parts) == 1 {
214214+ return uriShape{kind: uriUserLevel, did: did}
215215+ }
216216+ if len(parts) == 3 && parts[1] == "io.atcr.repo" && parts[2] != "" {
217217+ return uriShape{kind: uriRepoSummary, did: did, repo: parts[2]}
218218+ }
219219+ return uriShape{kind: uriOther, did: did}
174220}
175221176222// errInfoFrame is returned by decodeFrame when the frame is informational and the
···221267 }
222268}
223269224224-// extractSubjectFromURI extracts the DID and repository from an AT URI.
225225-// Examples:
226226-//
227227-// at://did:plc:xyz → (did:plc:xyz, "")
228228-// at://did:plc:xyz/io.atcr.manifest/abc → (did:plc:xyz, "") - repo extracted from record
229229-// at://did:plc:xyz/io.atcr.repo/myimage → (did:plc:xyz, "myimage")
230230-func extractSubjectFromURI(uri string) (did, repo string) {
231231- trimmed := strings.TrimPrefix(uri, "at://")
232232- parts := strings.SplitN(trimmed, "/", 3)
233233- if len(parts) == 0 {
234234- return "", ""
270270+// deriveLabelerDID returns the canonical labeler DID for source filtering.
271271+// When the operator gave us a did:... identifier directly, we use it as-is.
272272+// When they gave us a URL, we derive a did:web from its host so dev URLs
273273+// like http://labeler:5002 yield did:web:labeler%3A5002, matching the
274274+// labeler's own self-served identity.
275275+func deriveLabelerDID(labelerDIDOrURL, httpURL string) string {
276276+ if strings.HasPrefix(labelerDIDOrURL, "did:") {
277277+ return labelerDIDOrURL
235278 }
236236- did = parts[0]
237237-238238- // For repo-level summary labels: at://did/io.atcr.repo/reponame
239239- if len(parts) >= 3 && parts[1] == "io.atcr.repo" {
240240- repo = parts[2]
241241- }
242242- return did, repo
243243-}
244244-245245-// extractDIDFromURL derives a did:web from a labeler URL.
246246-func extractDIDFromURL(labelerURL string) string {
247247- u, err := url.Parse(labelerURL)
279279+ u, err := url.Parse(httpURL)
248280 if err != nil {
249249- return labelerURL
281281+ return labelerDIDOrURL
250282 }
251283 host := u.Hostname()
252284 if port := u.Port(); port != "" {
···290322 return nil
291323 }
292324 labelerURL := ParseLabelerURL(labelerDIDOrURL)
293293- return NewSubscriber(labelerURL, database)
325325+ return NewSubscriber(labelerDIDOrURL, labelerURL, database)
294326}