···11941194 return records, rows.Err()
11951195}
1196119611971197+// CountReferencesToURI returns how many records have a sourceRef pointing to the given URI.
11981198+// This searches the JSON record field across all collections.
11991199+func (idx *FeedIndex) CountReferencesToURI(ctx context.Context, uri string) (int, error) {
12001200+ var count int
12011201+ err := idx.db.QueryRowContext(ctx, `
12021202+ SELECT COUNT(*) FROM records
12031203+ WHERE json_extract(record, '$.sourceRef') = ?
12041204+ `, uri).Scan(&count)
12051205+ return count, err
12061206+}
12071207+11971208// RecordCount returns the total number of indexed records
11981209func (idx *FeedIndex) RecordCount() int {
11991210 var count int
+32-7
internal/suggestions/suggestions.go
···2222// RecordSource provides read access to indexed records.
2323type RecordSource interface {
2424 ListRecordsByCollectionOldest(ctx context.Context, collection string) ([]firehose.IndexedRecord, error)
2525+ CountReferencesToURI(ctx context.Context, uri string) (int, error)
2526}
2727+2828+// PreferredDIDs is a set of DIDs whose records should be preferred when
2929+// choosing the representative sourceRef for a suggestion. Records from
3030+// preferred DIDs get a scoring bonus during deduplication.
3131+var PreferredDIDs = map[string]struct{}{}
26322733// entityFieldConfig defines which fields to extract and search for each entity type
2834type entityFieldConfig struct {
···238244 // dedupKey -> aggregated suggestion
239245 type candidate struct {
240246 suggestion EntitySuggestion
241241- fieldCount int // number of non-empty fields (to pick best representative)
247247+ score int // composite score for picking best representative
242248 dids map[string]struct{}
243249 }
244250 candidates := make(map[string]*candidate)
···286292 // Deduplicate using entity-specific key
287293 key := config.dedupKey(fields)
288294295295+ score := scoreRecord(ctx, source, indexed.URI, indexed.DID, fields)
296296+289297 if existing, ok := candidates[key]; ok {
290298 existing.dids[indexed.DID] = struct{}{}
291291- // Keep the record with more complete fields
292292- nonEmpty := countNonEmpty(fields)
293293- if nonEmpty > existing.fieldCount {
299299+ if score > existing.score {
294300 existing.suggestion.Name = name
295301 existing.suggestion.Fields = fields
296302 existing.suggestion.SourceURI = indexed.URI
297297- existing.fieldCount = nonEmpty
303303+ existing.score = score
298304 }
299305 } else {
300306 candidates[key] = &candidate{
···303309 SourceURI: indexed.URI,
304310 Fields: fields,
305311 },
306306- fieldCount: countNonEmpty(fields),
307307- dids: map[string]struct{}{indexed.DID: {}},
312312+ score: score,
313313+ dids: map[string]struct{}{indexed.DID: {}},
308314 }
309315 }
310316 }
···334340 }
335341336342 return results, nil
343343+}
344344+345345+// scoreRecord computes a composite score for choosing the best representative
346346+// record within a dedup group. Higher score wins. Factors:
347347+// - Field completeness (1 point per non-empty field)
348348+// - Reference count (2 points per record that references this URI via sourceRef)
349349+// - Preferred DID bonus (10 points if the record's author is in PreferredDIDs)
350350+func scoreRecord(ctx context.Context, source RecordSource, uri, did string, fields map[string]string) int {
351351+ score := countNonEmpty(fields)
352352+353353+ if refCount, err := source.CountReferencesToURI(ctx, uri); err == nil {
354354+ score += refCount * 2
355355+ }
356356+357357+ if _, ok := PreferredDIDs[did]; ok {
358358+ score += 10
359359+ }
360360+361361+ return score
337362}
338363339364func countNonEmpty(fields map[string]string) int {
+97
tests/integration/suggestions_test.go
···88 "testing"
991010 "arabica/internal/models"
1111+ "arabica/internal/suggestions"
11121213 "github.com/stretchr/testify/assert"
1314 "github.com/stretchr/testify/require"
···5960 var results []suggestionResult
6061 require.NoError(t, json.Unmarshal([]byte(body), &results))
6162 return results
6363+}
6464+6565+// roasterURI builds the AT-URI for a roaster owned by the given DID.
6666+func roasterURI(did, rkey string) string {
6767+ return "at://" + did + "/social.arabica.alpha.roaster/" + rkey
6868+}
6969+7070+// TestHTTP_SuggestionScoring_PrefersReferenced verifies the sourceRef selection
7171+// logic end-to-end: when a roaster has been "adopted" by other users (their
7272+// records carry source_ref pointing back at the original), that original URI
7373+// should win as the canonical sourceRef in the suggestions response.
7474+//
7575+// This is the original motivating scenario for the scoring work — exercises
7676+// witness cache writes, the json_extract reference query, and the composite
7777+// score function all together.
7878+func TestHTTP_SuggestionScoring_PrefersReferenced(t *testing.T) {
7979+ h := StartHarness(t, nil)
8080+8181+ // Four users total: alice/bob/carol contribute, dave queries.
8282+ bob := h.CreateAccount("bob@test.com", "bob.test", "hunter2")
8383+ carol := h.CreateAccount("carol@test.com", "carol.test", "hunter2")
8484+ dave := h.CreateAccount("dave@test.com", "dave.test", "hunter2")
8585+8686+ aliceClient := h.Client // primary == alice
8787+ bobClient := h.NewClientForAccount(bob)
8888+ carolClient := h.NewClientForAccount(carol)
8989+ daveClient := h.NewClientForAccount(dave)
9090+ alice := h.PrimaryAccount
9191+9292+ // Alice creates the canonical "Counter Culture" roaster (no sourceRef).
9393+ aliceRoaster := postRoasterAs(t, h, aliceClient, "Counter Culture", "Durham, NC", "")
9494+ aliceURI := roasterURI(alice.DID, aliceRoaster.RKey)
9595+9696+ // Bob and Carol both adopt Alice's roaster — i.e. they create their own
9797+ // records with source_ref pointing at her URI. Each adoption gives Alice
9898+ // +2 points in the score function (refCount * 2).
9999+ postRoasterAs(t, h, bobClient, "Counter Culture Coffee", "Durham, NC", aliceURI)
100100+ postRoasterAs(t, h, carolClient, "Counter Culture", "Durham, NC", aliceURI)
101101+102102+ // Dave queries suggestions. The three contributing roasters dedupe into
103103+ // one candidate, and Alice's URI should win because two records reference it.
104104+ results := fetchSuggestions(t, h, daveClient, "roasters", "counter")
105105+ require.NotEmpty(t, results, "expected at least one suggestion")
106106+107107+ var cc *suggestionResult
108108+ for i := range results {
109109+ if strings.Contains(strings.ToLower(results[i].Name), "counter culture") {
110110+ cc = &results[i]
111111+ break
112112+ }
113113+ }
114114+ require.NotNil(t, cc, "expected a Counter Culture suggestion in results")
115115+116116+ assert.Equal(t, 3, cc.Count, "all three contributing users should be counted")
117117+ assert.Equal(t, aliceURI, cc.SourceURI,
118118+ "alice's roaster (referenced by 2 others) should win as the canonical sourceRef")
119119+}
120120+121121+// TestHTTP_SuggestionScoring_PreferredDIDOverride verifies that a DID added to
122122+// suggestions.PreferredDIDs wins over a record with more references.
123123+func TestHTTP_SuggestionScoring_PreferredDIDOverride(t *testing.T) {
124124+ h := StartHarness(t, nil)
125125+126126+ bob := h.CreateAccount("bob@test.com", "bob.test", "hunter2")
127127+ carol := h.CreateAccount("carol@test.com", "carol.test", "hunter2")
128128+ dave := h.CreateAccount("dave@test.com", "dave.test", "hunter2")
129129+130130+ aliceClient := h.Client
131131+ bobClient := h.NewClientForAccount(bob)
132132+ carolClient := h.NewClientForAccount(carol)
133133+ daveClient := h.NewClientForAccount(dave)
134134+ alice := h.PrimaryAccount
135135+136136+ // Alice creates the canonical roaster, Bob and Carol adopt it.
137137+ aliceRoaster := postRoasterAs(t, h, aliceClient, "Counter Culture", "Durham, NC", "")
138138+ aliceURI := roasterURI(alice.DID, aliceRoaster.RKey)
139139+ bobRoaster := postRoasterAs(t, h, bobClient, "Counter Culture Coffee", "Durham, NC", aliceURI)
140140+ postRoasterAs(t, h, carolClient, "Counter Culture", "Durham, NC", aliceURI)
141141+142142+ // Mark Bob as preferred. The +10 bonus should overcome Alice's +4 from refs.
143143+ suggestions.PreferredDIDs[bob.DID] = struct{}{}
144144+ t.Cleanup(func() { delete(suggestions.PreferredDIDs, bob.DID) })
145145+146146+ results := fetchSuggestions(t, h, daveClient, "roasters", "counter")
147147+ var cc *suggestionResult
148148+ for i := range results {
149149+ if strings.Contains(strings.ToLower(results[i].Name), "counter culture") {
150150+ cc = &results[i]
151151+ break
152152+ }
153153+ }
154154+ require.NotNil(t, cc)
155155+156156+ bobURI := roasterURI(bob.DID, bobRoaster.RKey)
157157+ assert.Equal(t, bobURI, cc.SourceURI,
158158+ "bob's roaster should win because his DID is in PreferredDIDs")
62159}
6316064161// TestHTTP_SuggestionDedupe verifies that when multiple users post a roaster