Coffee journaling on ATProto (alpha) alpha.arabica.social
coffee
17
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: preferred DIDs for source refs, prefer more links

authored by

Patrick Dewey and committed by tangled.org 9d289b35 1a524b64

+140 -7
+11
internal/firehose/index.go
··· 1194 1194 return records, rows.Err() 1195 1195 } 1196 1196 1197 + // CountReferencesToURI returns how many records have a sourceRef pointing to the given URI. 1198 + // This searches the JSON record field across all collections. 1199 + func (idx *FeedIndex) CountReferencesToURI(ctx context.Context, uri string) (int, error) { 1200 + var count int 1201 + err := idx.db.QueryRowContext(ctx, ` 1202 + SELECT COUNT(*) FROM records 1203 + WHERE json_extract(record, '$.sourceRef') = ? 1204 + `, uri).Scan(&count) 1205 + return count, err 1206 + } 1207 + 1197 1208 // RecordCount returns the total number of indexed records 1198 1209 func (idx *FeedIndex) RecordCount() int { 1199 1210 var count int
+32 -7
internal/suggestions/suggestions.go
··· 22 22 // RecordSource provides read access to indexed records. 23 23 type RecordSource interface { 24 24 ListRecordsByCollectionOldest(ctx context.Context, collection string) ([]firehose.IndexedRecord, error) 25 + CountReferencesToURI(ctx context.Context, uri string) (int, error) 25 26 } 27 + 28 + // PreferredDIDs is a set of DIDs whose records should be preferred when 29 + // choosing the representative sourceRef for a suggestion. Records from 30 + // preferred DIDs get a scoring bonus during deduplication. 31 + var PreferredDIDs = map[string]struct{}{} 26 32 27 33 // entityFieldConfig defines which fields to extract and search for each entity type 28 34 type entityFieldConfig struct { ··· 238 244 // dedupKey -> aggregated suggestion 239 245 type candidate struct { 240 246 suggestion EntitySuggestion 241 - fieldCount int // number of non-empty fields (to pick best representative) 247 + score int // composite score for picking best representative 242 248 dids map[string]struct{} 243 249 } 244 250 candidates := make(map[string]*candidate) ··· 286 292 // Deduplicate using entity-specific key 287 293 key := config.dedupKey(fields) 288 294 295 + score := scoreRecord(ctx, source, indexed.URI, indexed.DID, fields) 296 + 289 297 if existing, ok := candidates[key]; ok { 290 298 existing.dids[indexed.DID] = struct{}{} 291 - // Keep the record with more complete fields 292 - nonEmpty := countNonEmpty(fields) 293 - if nonEmpty > existing.fieldCount { 299 + if score > existing.score { 294 300 existing.suggestion.Name = name 295 301 existing.suggestion.Fields = fields 296 302 existing.suggestion.SourceURI = indexed.URI 297 - existing.fieldCount = nonEmpty 303 + existing.score = score 298 304 } 299 305 } else { 300 306 candidates[key] = &candidate{ ··· 303 309 SourceURI: indexed.URI, 304 310 Fields: fields, 305 311 }, 306 - fieldCount: countNonEmpty(fields), 307 - dids: map[string]struct{}{indexed.DID: {}}, 312 + score: score, 313 + dids: map[string]struct{}{indexed.DID: {}}, 308 314 } 309 315 } 310 316 } ··· 334 340 } 335 341 336 342 return results, nil 343 + } 344 + 345 + // scoreRecord computes a composite score for choosing the best representative 346 + // record within a dedup group. Higher score wins. Factors: 347 + // - Field completeness (1 point per non-empty field) 348 + // - Reference count (2 points per record that references this URI via sourceRef) 349 + // - Preferred DID bonus (10 points if the record's author is in PreferredDIDs) 350 + func scoreRecord(ctx context.Context, source RecordSource, uri, did string, fields map[string]string) int { 351 + score := countNonEmpty(fields) 352 + 353 + if refCount, err := source.CountReferencesToURI(ctx, uri); err == nil { 354 + score += refCount * 2 355 + } 356 + 357 + if _, ok := PreferredDIDs[did]; ok { 358 + score += 10 359 + } 360 + 361 + return score 337 362 } 338 363 339 364 func countNonEmpty(fields map[string]string) int {
+97
tests/integration/suggestions_test.go
··· 8 8 "testing" 9 9 10 10 "arabica/internal/models" 11 + "arabica/internal/suggestions" 11 12 12 13 "github.com/stretchr/testify/assert" 13 14 "github.com/stretchr/testify/require" ··· 59 60 var results []suggestionResult 60 61 require.NoError(t, json.Unmarshal([]byte(body), &results)) 61 62 return results 63 + } 64 + 65 + // roasterURI builds the AT-URI for a roaster owned by the given DID. 66 + func roasterURI(did, rkey string) string { 67 + return "at://" + did + "/social.arabica.alpha.roaster/" + rkey 68 + } 69 + 70 + // TestHTTP_SuggestionScoring_PrefersReferenced verifies the sourceRef selection 71 + // logic end-to-end: when a roaster has been "adopted" by other users (their 72 + // records carry source_ref pointing back at the original), that original URI 73 + // should win as the canonical sourceRef in the suggestions response. 74 + // 75 + // This is the original motivating scenario for the scoring work — exercises 76 + // witness cache writes, the json_extract reference query, and the composite 77 + // score function all together. 78 + func TestHTTP_SuggestionScoring_PrefersReferenced(t *testing.T) { 79 + h := StartHarness(t, nil) 80 + 81 + // Four users total: alice/bob/carol contribute, dave queries. 82 + bob := h.CreateAccount("bob@test.com", "bob.test", "hunter2") 83 + carol := h.CreateAccount("carol@test.com", "carol.test", "hunter2") 84 + dave := h.CreateAccount("dave@test.com", "dave.test", "hunter2") 85 + 86 + aliceClient := h.Client // primary == alice 87 + bobClient := h.NewClientForAccount(bob) 88 + carolClient := h.NewClientForAccount(carol) 89 + daveClient := h.NewClientForAccount(dave) 90 + alice := h.PrimaryAccount 91 + 92 + // Alice creates the canonical "Counter Culture" roaster (no sourceRef). 93 + aliceRoaster := postRoasterAs(t, h, aliceClient, "Counter Culture", "Durham, NC", "") 94 + aliceURI := roasterURI(alice.DID, aliceRoaster.RKey) 95 + 96 + // Bob and Carol both adopt Alice's roaster — i.e. they create their own 97 + // records with source_ref pointing at her URI. Each adoption gives Alice 98 + // +2 points in the score function (refCount * 2). 99 + postRoasterAs(t, h, bobClient, "Counter Culture Coffee", "Durham, NC", aliceURI) 100 + postRoasterAs(t, h, carolClient, "Counter Culture", "Durham, NC", aliceURI) 101 + 102 + // Dave queries suggestions. The three contributing roasters dedupe into 103 + // one candidate, and Alice's URI should win because two records reference it. 104 + results := fetchSuggestions(t, h, daveClient, "roasters", "counter") 105 + require.NotEmpty(t, results, "expected at least one suggestion") 106 + 107 + var cc *suggestionResult 108 + for i := range results { 109 + if strings.Contains(strings.ToLower(results[i].Name), "counter culture") { 110 + cc = &results[i] 111 + break 112 + } 113 + } 114 + require.NotNil(t, cc, "expected a Counter Culture suggestion in results") 115 + 116 + assert.Equal(t, 3, cc.Count, "all three contributing users should be counted") 117 + assert.Equal(t, aliceURI, cc.SourceURI, 118 + "alice's roaster (referenced by 2 others) should win as the canonical sourceRef") 119 + } 120 + 121 + // TestHTTP_SuggestionScoring_PreferredDIDOverride verifies that a DID added to 122 + // suggestions.PreferredDIDs wins over a record with more references. 123 + func TestHTTP_SuggestionScoring_PreferredDIDOverride(t *testing.T) { 124 + h := StartHarness(t, nil) 125 + 126 + bob := h.CreateAccount("bob@test.com", "bob.test", "hunter2") 127 + carol := h.CreateAccount("carol@test.com", "carol.test", "hunter2") 128 + dave := h.CreateAccount("dave@test.com", "dave.test", "hunter2") 129 + 130 + aliceClient := h.Client 131 + bobClient := h.NewClientForAccount(bob) 132 + carolClient := h.NewClientForAccount(carol) 133 + daveClient := h.NewClientForAccount(dave) 134 + alice := h.PrimaryAccount 135 + 136 + // Alice creates the canonical roaster, Bob and Carol adopt it. 137 + aliceRoaster := postRoasterAs(t, h, aliceClient, "Counter Culture", "Durham, NC", "") 138 + aliceURI := roasterURI(alice.DID, aliceRoaster.RKey) 139 + bobRoaster := postRoasterAs(t, h, bobClient, "Counter Culture Coffee", "Durham, NC", aliceURI) 140 + postRoasterAs(t, h, carolClient, "Counter Culture", "Durham, NC", aliceURI) 141 + 142 + // Mark Bob as preferred. The +10 bonus should overcome Alice's +4 from refs. 143 + suggestions.PreferredDIDs[bob.DID] = struct{}{} 144 + t.Cleanup(func() { delete(suggestions.PreferredDIDs, bob.DID) }) 145 + 146 + results := fetchSuggestions(t, h, daveClient, "roasters", "counter") 147 + var cc *suggestionResult 148 + for i := range results { 149 + if strings.Contains(strings.ToLower(results[i].Name), "counter culture") { 150 + cc = &results[i] 151 + break 152 + } 153 + } 154 + require.NotNil(t, cc) 155 + 156 + bobURI := roasterURI(bob.DID, bobRoaster.RKey) 157 + assert.Equal(t, bobURI, cc.SourceURI, 158 + "bob's roaster should win because his DID is in PreferredDIDs") 62 159 } 63 160 64 161 // TestHTTP_SuggestionDedupe verifies that when multiple users post a roaster