Coffee journaling on ATProto (alpha) alpha.arabica.social
coffee
17
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: improved moderation content filter

authored by

Patrick Dewey and committed by tangled.org cd30309f 7e0eaa9c

+297 -28
+8 -28
internal/feed/service.go
··· 10 10 "arabica/internal/lexicons" 11 11 "arabica/internal/metrics" 12 12 "arabica/internal/models" 13 + "arabica/internal/moderation" 13 14 14 15 "github.com/rs/zerolog/log" 15 16 ) ··· 175 176 return items 176 177 } 177 178 178 - // Load moderation sets in bulk (2 queries instead of 2*len(items)) 179 - blacklistedDIDs := make(map[string]bool) 180 - if dids, err := s.moderationFilter.ListBlacklistedDIDs(ctx); err == nil { 181 - for _, did := range dids { 182 - blacklistedDIDs[did] = true 183 - } 179 + f, err := moderation.LoadFilter(ctx, s.moderationFilter) 180 + if err != nil { 181 + log.Warn().Err(err).Msg("feed: failed to load moderation filter") 182 + return items 184 183 } 185 184 186 - hiddenURIs := make(map[string]bool) 187 - if uris, err := s.moderationFilter.ListHiddenURIs(ctx); err == nil { 188 - for _, uri := range uris { 189 - hiddenURIs[uri] = true 190 - } 191 - } 192 - 193 - filtered := make([]*FeedItem, 0, len(items)) 194 - for _, item := range items { 195 - authorDID := s.getAuthorDID(item) 196 - if authorDID != "" && blacklistedDIDs[authorDID] { 197 - log.Debug().Str("author", authorDID).Msg("feed: filtering blacklisted user's content") 198 - continue 199 - } 200 - 201 - if item.SubjectURI != "" && hiddenURIs[item.SubjectURI] { 202 - log.Debug().Str("uri", item.SubjectURI).Msg("feed: filtering hidden record") 203 - continue 204 - } 205 - 206 - filtered = append(filtered, item) 207 - } 185 + filtered := moderation.FilterSlice(f, items, func(item *FeedItem) (string, string) { 186 + return item.SubjectURI, s.getAuthorDID(item) 187 + }) 208 188 209 189 if len(items) != len(filtered) { 210 190 log.Debug().
+14
internal/handlers/handlers.go
··· 128 128 } 129 129 } 130 130 131 + // loadContentFilter creates a ContentFilter from the moderation store. 132 + // Returns nil if moderation is not configured. 133 + func (h *Handler) loadContentFilter(ctx context.Context) *moderation.ContentFilter { 134 + if h.moderationStore == nil { 135 + return nil 136 + } 137 + f, err := moderation.LoadFilter(ctx, h.moderationStore) 138 + if err != nil { 139 + log.Warn().Err(err).Msg("failed to load content filter") 140 + return nil 141 + } 142 + return f 143 + } 144 + 131 145 // validateRKey validates and returns an rkey from a path parameter. 132 146 // Returns the rkey if valid, or writes an error response and returns empty string if invalid. 133 147 func validateRKey(w http.ResponseWriter, rkey string) string {
+25
internal/handlers/profile.go
··· 9 9 "arabica/internal/atproto" 10 10 "arabica/internal/metrics" 11 11 "arabica/internal/models" 12 + "arabica/internal/moderation" 12 13 "arabica/internal/web/bff" 13 14 "arabica/internal/web/components" 14 15 "arabica/internal/web/pages" ··· 420 421 // For now, continue with the DID we have 421 422 } 422 423 424 + // Check if user is blacklisted 425 + if cf := h.loadContentFilter(ctx); cf != nil && cf.IsBlocked(did) { 426 + layoutData, _, _ := h.layoutDataFromRequest(r, "Profile Not Found") 427 + w.WriteHeader(http.StatusNotFound) 428 + if err := pages.ProfileNotFound(layoutData).Render(r.Context(), w); err != nil { 429 + log.Error().Err(err).Msg("Failed to render profile not found page") 430 + } 431 + return 432 + } 433 + 423 434 // Fetch profile 424 435 profile, err := publicClient.GetProfile(ctx, did) 425 436 if err != nil { ··· 532 543 } 533 544 } 534 545 546 + // Check if user is blacklisted 547 + cf := h.loadContentFilter(ctx) 548 + if cf != nil && cf.IsBlocked(did) { 549 + http.Error(w, "User not found", http.StatusNotFound) 550 + return 551 + } 552 + 535 553 // Fetch all user data from their PDS 536 554 profileData, err := h.fetchUserProfileData(ctx, did, publicClient) 537 555 if err != nil { 538 556 log.Error().Err(err).Str("did", did).Msg("Failed to fetch user data for profile partial") 539 557 http.Error(w, "Failed to load profile data", http.StatusInternalServerError) 540 558 return 559 + } 560 + 561 + // Filter moderated content from profile 562 + if cf != nil { 563 + profileData.Brews = moderation.FilterSlice(cf, profileData.Brews, func(b *models.Brew) (string, string) { 564 + return atproto.BuildATURI(did, atproto.NSIDBrew, b.RKey), did 565 + }) 541 566 } 542 567 543 568 // Check if this is an Arabica user (has records or is registered in feed)
+11
internal/handlers/recipe.go
··· 11 11 "arabica/internal/atproto" 12 12 "arabica/internal/matching" 13 13 "arabica/internal/models" 14 + "arabica/internal/moderation" 14 15 "arabica/internal/web/components" 15 16 "arabica/internal/web/pages" 16 17 ··· 637 638 638 639 recipe.Interpolate() 639 640 recipes = append(recipes, recipe) 641 + } 642 + 643 + // Filter moderated content (hidden records + blacklisted users) 644 + if cf := h.loadContentFilter(ctx); cf != nil { 645 + recipes = moderation.FilterSlice(cf, recipes, func(r *models.Recipe) (string, string) { 646 + if r.AuthorDID != "" && r.RKey != "" { 647 + return atproto.BuildATURI(r.AuthorDID, atproto.NSIDRecipe, r.RKey), r.AuthorDID 648 + } 649 + return "", r.AuthorDID 650 + }) 640 651 } 641 652 642 653 return recipes, nil
+89
internal/moderation/filter.go
··· 1 + package moderation 2 + 3 + import ( 4 + "context" 5 + 6 + "github.com/rs/zerolog/log" 7 + ) 8 + 9 + // FilterSource provides the data needed to build a ContentFilter. 10 + // Both moderation.Store and feed.ModerationFilter satisfy this interface. 11 + type FilterSource interface { 12 + ListHiddenURIs(ctx context.Context) ([]string, error) 13 + ListBlacklistedDIDs(ctx context.Context) ([]string, error) 14 + } 15 + 16 + // ContentFilter holds pre-loaded moderation state for efficient per-item checks. 17 + // Create one per request via LoadFilter, then use ShouldHide or FilterSlice. 18 + type ContentFilter struct { 19 + hiddenURIs map[string]bool 20 + blacklisted map[string]bool 21 + } 22 + 23 + // LoadFilter bulk-loads hidden URIs and blacklisted DIDs from the source (2 queries). 24 + // Errors from the source are logged and degraded gracefully (partial filtering). 25 + // A nil source returns an empty filter that hides nothing. 26 + func LoadFilter(ctx context.Context, src FilterSource) (*ContentFilter, error) { 27 + f := &ContentFilter{ 28 + hiddenURIs: make(map[string]bool), 29 + blacklisted: make(map[string]bool), 30 + } 31 + 32 + if src == nil { 33 + return f, nil 34 + } 35 + 36 + if uris, err := src.ListHiddenURIs(ctx); err != nil { 37 + log.Warn().Err(err).Msg("moderation: failed to load hidden URIs for filter") 38 + } else { 39 + for _, uri := range uris { 40 + f.hiddenURIs[uri] = true 41 + } 42 + } 43 + 44 + if dids, err := src.ListBlacklistedDIDs(ctx); err != nil { 45 + log.Warn().Err(err).Msg("moderation: failed to load blacklisted DIDs for filter") 46 + } else { 47 + for _, did := range dids { 48 + f.blacklisted[did] = true 49 + } 50 + } 51 + 52 + return f, nil 53 + } 54 + 55 + // ShouldHide returns true if the record should be hidden, either because its 56 + // URI is in the hidden set or its author DID is blacklisted. 57 + // Empty strings are never matched. 58 + func (f *ContentFilter) ShouldHide(uri, authorDID string) bool { 59 + if uri != "" && f.hiddenURIs[uri] { 60 + return true 61 + } 62 + if authorDID != "" && f.blacklisted[authorDID] { 63 + return true 64 + } 65 + return false 66 + } 67 + 68 + // IsBlocked returns true if the given DID is blacklisted. 69 + func (f *ContentFilter) IsBlocked(did string) bool { 70 + return did != "" && f.blacklisted[did] 71 + } 72 + 73 + // FilterSlice removes items that should be hidden from a slice. 74 + // The getKeys function extracts the AT-URI and author DID from each item. 75 + // A nil filter returns the input unchanged. 76 + func FilterSlice[T any](f *ContentFilter, items []T, getKeys func(T) (uri string, authorDID string)) []T { 77 + if f == nil { 78 + return items 79 + } 80 + 81 + result := make([]T, 0, len(items)) 82 + for _, item := range items { 83 + uri, did := getKeys(item) 84 + if !f.ShouldHide(uri, did) { 85 + result = append(result, item) 86 + } 87 + } 88 + return result 89 + }
+150
internal/moderation/filter_test.go
··· 1 + package moderation 2 + 3 + import ( 4 + "context" 5 + "testing" 6 + 7 + "github.com/stretchr/testify/assert" 8 + "github.com/stretchr/testify/require" 9 + ) 10 + 11 + // mockFilterSource implements FilterSource for testing 12 + type mockFilterSource struct { 13 + hiddenURIs []string 14 + blacklistedDIDs []string 15 + hiddenErr error 16 + blacklistErr error 17 + } 18 + 19 + func (m *mockFilterSource) ListHiddenURIs(ctx context.Context) ([]string, error) { 20 + return m.hiddenURIs, m.hiddenErr 21 + } 22 + 23 + func (m *mockFilterSource) ListBlacklistedDIDs(ctx context.Context) ([]string, error) { 24 + return m.blacklistedDIDs, m.blacklistErr 25 + } 26 + 27 + func TestLoadFilter(t *testing.T) { 28 + ctx := context.Background() 29 + src := &mockFilterSource{ 30 + hiddenURIs: []string{"at://did:plc:a/col/1", "at://did:plc:b/col/2"}, 31 + blacklistedDIDs: []string{"did:plc:bad"}, 32 + } 33 + 34 + f, err := LoadFilter(ctx, src) 35 + require.NoError(t, err) 36 + assert.NotNil(t, f) 37 + } 38 + 39 + func TestLoadFilter_NilSource(t *testing.T) { 40 + f, err := LoadFilter(context.Background(), nil) 41 + require.NoError(t, err) 42 + assert.NotNil(t, f) 43 + // Empty filter should hide nothing 44 + assert.False(t, f.ShouldHide("at://anything", "did:plc:anyone")) 45 + } 46 + 47 + func TestShouldHide_HiddenURI(t *testing.T) { 48 + ctx := context.Background() 49 + f, _ := LoadFilter(ctx, &mockFilterSource{ 50 + hiddenURIs: []string{"at://did:plc:a/col/1"}, 51 + }) 52 + 53 + assert.True(t, f.ShouldHide("at://did:plc:a/col/1", "")) 54 + assert.False(t, f.ShouldHide("at://did:plc:a/col/2", "")) 55 + } 56 + 57 + func TestShouldHide_BlacklistedAuthor(t *testing.T) { 58 + ctx := context.Background() 59 + f, _ := LoadFilter(ctx, &mockFilterSource{ 60 + blacklistedDIDs: []string{"did:plc:bad"}, 61 + }) 62 + 63 + assert.True(t, f.ShouldHide("", "did:plc:bad")) 64 + assert.False(t, f.ShouldHide("", "did:plc:good")) 65 + } 66 + 67 + func TestShouldHide_BothEmpty(t *testing.T) { 68 + ctx := context.Background() 69 + f, _ := LoadFilter(ctx, &mockFilterSource{}) 70 + 71 + assert.False(t, f.ShouldHide("at://anything", "did:plc:anyone")) 72 + } 73 + 74 + func TestIsBlocked(t *testing.T) { 75 + ctx := context.Background() 76 + f, _ := LoadFilter(ctx, &mockFilterSource{ 77 + blacklistedDIDs: []string{"did:plc:bad"}, 78 + }) 79 + 80 + assert.True(t, f.IsBlocked("did:plc:bad")) 81 + assert.False(t, f.IsBlocked("did:plc:good")) 82 + } 83 + 84 + func TestFilterSlice(t *testing.T) { 85 + type item struct { 86 + uri string 87 + authorDID string 88 + name string 89 + } 90 + 91 + ctx := context.Background() 92 + f, _ := LoadFilter(ctx, &mockFilterSource{ 93 + hiddenURIs: []string{"at://did:plc:a/col/hidden"}, 94 + blacklistedDIDs: []string{"did:plc:bad"}, 95 + }) 96 + 97 + items := []*item{ 98 + {uri: "at://did:plc:a/col/ok", authorDID: "did:plc:good", name: "visible"}, 99 + {uri: "at://did:plc:a/col/hidden", authorDID: "did:plc:good", name: "hidden-record"}, 100 + {uri: "at://did:plc:b/col/ok", authorDID: "did:plc:bad", name: "blocked-author"}, 101 + {uri: "at://did:plc:c/col/ok", authorDID: "did:plc:nice", name: "also-visible"}, 102 + } 103 + 104 + result := FilterSlice(f, items, func(i *item) (string, string) { 105 + return i.uri, i.authorDID 106 + }) 107 + 108 + assert.Len(t, result, 2) 109 + assert.Equal(t, "visible", result[0].name) 110 + assert.Equal(t, "also-visible", result[1].name) 111 + } 112 + 113 + func TestFilterSlice_NilFilter(t *testing.T) { 114 + type item struct{ name string } 115 + items := []*item{{name: "a"}, {name: "b"}} 116 + 117 + result := FilterSlice[*item](nil, items, func(i *item) (string, string) { 118 + return "", "" 119 + }) 120 + 121 + assert.Len(t, result, 2) 122 + } 123 + 124 + func TestLoadFilter_SourceErrors(t *testing.T) { 125 + ctx := context.Background() 126 + 127 + t.Run("hidden URIs error returns partial filter", func(t *testing.T) { 128 + f, err := LoadFilter(ctx, &mockFilterSource{ 129 + hiddenErr: assert.AnError, 130 + blacklistedDIDs: []string{"did:plc:bad"}, 131 + }) 132 + require.NoError(t, err) 133 + // Blacklist still works 134 + assert.True(t, f.IsBlocked("did:plc:bad")) 135 + // Hidden URIs degraded gracefully 136 + assert.False(t, f.ShouldHide("at://anything", "")) 137 + }) 138 + 139 + t.Run("blacklist error returns partial filter", func(t *testing.T) { 140 + f, err := LoadFilter(ctx, &mockFilterSource{ 141 + hiddenURIs: []string{"at://did:plc:a/col/1"}, 142 + blacklistErr: assert.AnError, 143 + }) 144 + require.NoError(t, err) 145 + // Hidden URIs still work 146 + assert.True(t, f.ShouldHide("at://did:plc:a/col/1", "")) 147 + // Blacklist degraded gracefully 148 + assert.False(t, f.IsBlocked("did:plc:bad")) 149 + }) 150 + }