···11+package helpers
22+33+import (
44+ "time"
55+66+ "github.com/bluesky-social/indigo/automod"
77+)
88+99+// no accounts exist before this time
1010+var atprotoAccountEpoch = time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)
1111+1212+// returns true if account creation timestamp is plausible: not-nil, not in distant past, not in the future
1313+func plausibleAccountCreation(when *time.Time) bool {
1414+ if when == nil {
1515+ return false
1616+ }
1717+ // this is mostly to check for misconfigurations or null values (eg, UNIX epoch zero means "unknown" not actually 1970)
1818+ if !when.After(atprotoAccountEpoch) {
1919+ return false
2020+ }
2121+ // a timestamp in the future would also indicate some misconfiguration
2222+ if when.After(time.Now().Add(time.Hour)) {
2323+ return false
2424+ }
2525+ return true
2626+}
2727+2828+// checks if account was created recently, based on either public or private account metadata. if metadata isn't available at all, or seems bogus, returns 'false'
2929+func AccountIsYoungerThan(c *automod.AccountContext, age time.Duration) bool {
3030+ // TODO: consider swapping priority order here (and below)
3131+ if c.Account.CreatedAt != nil && plausibleAccountCreation(c.Account.CreatedAt) {
3232+ return time.Since(*c.Account.CreatedAt) < age
3333+ }
3434+ if c.Account.Private != nil && plausibleAccountCreation(c.Account.Private.IndexedAt) {
3535+ return time.Since(*c.Account.Private.IndexedAt) < age
3636+ }
3737+ return false
3838+}
3939+4040+// checks if account was *not* created recently, based on either public or private account metadata. if metadata isn't available at all, or seems bogus, returns 'false'
4141+func AccountIsOlderThan(c *automod.AccountContext, age time.Duration) bool {
4242+ if c.Account.CreatedAt != nil && plausibleAccountCreation(c.Account.CreatedAt) {
4343+ return time.Since(*c.Account.CreatedAt) >= age
4444+ }
4545+ if c.Account.Private != nil && plausibleAccountCreation(c.Account.Private.IndexedAt) {
4646+ return time.Since(*c.Account.Private.IndexedAt) >= age
4747+ }
4848+ return false
4949+}
···11+package helpers
22+33+import (
44+ "fmt"
55+ "regexp"
66+77+ "github.com/spaolacci/murmur3"
88+)
99+1010+func DedupeStrings(in []string) []string {
1111+ var out []string
1212+ seen := make(map[string]bool)
1313+ for _, v := range in {
1414+ if !seen[v] {
1515+ out = append(out, v)
1616+ seen[v] = true
1717+ }
1818+ }
1919+ return out
2020+}
2121+2222+// returns a fast, compact hash of a string
2323+//
2424+// current implementation uses murmur3, default seed, and hex encoding
2525+func HashOfString(s string) string {
2626+ val := murmur3.Sum64([]byte(s))
2727+ return fmt.Sprintf("%016x", val)
2828+}
2929+3030+// based on: https://stackoverflow.com/a/48769624, with no trailing period allowed
3131+var urlRegex = regexp.MustCompile(`(?:(?:https?|ftp):\/\/)?[\w/\-?=%.]+\.[\w/\-&?=%.]*[\w/\-&?=%]+`)
3232+3333+func ExtractTextURLs(raw string) []string {
3434+ return urlRegex.FindAllString(raw, -1)
3535+}
+64
automod/helpers/text_test.go
···11+package helpers
22+33+import (
44+ "testing"
55+66+ "github.com/bluesky-social/indigo/automod/keyword"
77+88+ "github.com/stretchr/testify/assert"
99+)
1010+1111+func TestTokenizeText(t *testing.T) {
1212+ assert := assert.New(t)
1313+1414+ fixtures := []struct {
1515+ s string
1616+ out []string
1717+ }{
1818+ {
1919+ s: "1 'Two' three!",
2020+ out: []string{"1", "two", "three"},
2121+ },
2222+ {
2323+ s: " foo1;bar2,baz3...",
2424+ out: []string{"foo1", "bar2", "baz3"},
2525+ },
2626+ {
2727+ s: "https://example.com/index.html",
2828+ out: []string{"https", "example", "com", "index", "html"},
2929+ },
3030+ }
3131+3232+ for _, fix := range fixtures {
3333+ assert.Equal(fix.out, keyword.TokenizeText(fix.s))
3434+ }
3535+}
3636+3737+func TestExtractURL(t *testing.T) {
3838+ assert := assert.New(t)
3939+4040+ fixtures := []struct {
4141+ s string
4242+ out []string
4343+ }{
4444+ {
4545+ s: "this is a description with example.com mentioned in the middle",
4646+ out: []string{"example.com"},
4747+ },
4848+ {
4949+ s: "this is another example with https://en.wikipedia.org/index.html: and archive.org, and https://eff.org/... and bsky.app.",
5050+ out: []string{"https://en.wikipedia.org/index.html", "archive.org", "https://eff.org/", "bsky.app"},
5151+ },
5252+ }
5353+5454+ for _, fix := range fixtures {
5555+ assert.Equal(fix.out, ExtractTextURLs(fix.s))
5656+ }
5757+}
5858+5959+func TestHashOfString(t *testing.T) {
6060+ assert := assert.New(t)
6161+6262+ // hashing function should be consistent over time
6363+ assert.Equal("4e6f69c0e3d10992", HashOfString("dummy-value"))
6464+}
+6-5
automod/rules/harassment.go
···88 "github.com/bluesky-social/indigo/atproto/syntax"
99 "github.com/bluesky-social/indigo/automod"
1010 "github.com/bluesky-social/indigo/automod/countstore"
1111+ "github.com/bluesky-social/indigo/automod/helpers"
1112)
12131314var _ automod.PostRuleFunc = HarassmentTargetInteractionPostRule
14151516// looks for new accounts, which interact with frequently-harassed accounts, and report them for review
1617func HarassmentTargetInteractionPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error {
1717- if c.Account.Identity == nil || !AccountIsYoungerThan(&c.AccountContext, 24*time.Hour) {
1818+ if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(&c.AccountContext, 24*time.Hour) {
1819 return nil
1920 }
20212122 var interactionDIDs []string
2222- facets, err := ExtractFacets(post)
2323+ facets, err := helpers.ExtractFacets(post)
2324 if err != nil {
2425 return err
2526 }
···2829 interactionDIDs = append(interactionDIDs, *pf.DID)
2930 }
3031 }
3131- if post.Reply != nil && !IsSelfThread(c, post) {
3232+ if post.Reply != nil && !helpers.IsSelfThread(c, post) {
3233 parentURI, err := syntax.ParseATURI(post.Reply.Parent.Uri)
3334 if err != nil {
3435 return err
···5758 return nil
5859 }
59606060- interactionDIDs = dedupeStrings(interactionDIDs)
6161+ interactionDIDs = helpers.DedupeStrings(interactionDIDs)
6162 for _, d := range interactionDIDs {
6263 did, err := syntax.ParseDID(d)
6364 if err != nil {
···114115115116// looks for new accounts, which frequently post the same type of content
116117func HarassmentTrivialPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error {
117117- if c.Account.Identity == nil || !AccountIsYoungerThan(&c.AccountContext, 7*24*time.Hour) {
118118+ if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(&c.AccountContext, 7*24*time.Hour) {
118119 return nil
119120 }
120121
+4-3
automod/rules/hashtags.go
···5566 appbsky "github.com/bluesky-social/indigo/api/bsky"
77 "github.com/bluesky-social/indigo/automod"
88+ "github.com/bluesky-social/indigo/automod/helpers"
89 "github.com/bluesky-social/indigo/automod/keyword"
910)
10111112// looks for specific hashtags from known lists
1213func BadHashtagsPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error {
1313- for _, tag := range ExtractHashtagsPost(post) {
1414- tag = NormalizeHashtag(tag)
1414+ for _, tag := range helpers.ExtractHashtagsPost(post) {
1515+ tag = helpers.NormalizeHashtag(tag)
1516 // skip some bad-word hashtags which frequently false-positive
1617 if tag == "nazi" || tag == "hitler" {
1718 continue
···35363637// if a post is "almost all" hashtags, it might be a form of search spam
3738func TooManyHashtagsPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error {
3838- tags := ExtractHashtagsPost(post)
3939+ tags := helpers.ExtractHashtagsPost(post)
3940 tagChars := 0
4041 for _, tag := range tags {
4142 tagChars += len(tag)
+4-77
automod/rules/helpers.go
automod/helpers/bsky.go
···11-package rules
11+package helpers
2233import (
44 "fmt"
55- "regexp"
66- "time"
7586 appbsky "github.com/bluesky-social/indigo/api/bsky"
97 "github.com/bluesky-social/indigo/atproto/syntax"
108 "github.com/bluesky-social/indigo/automod"
119 "github.com/bluesky-social/indigo/automod/keyword"
1212-1313- "github.com/spaolacci/murmur3"
1410)
15111616-func dedupeStrings(in []string) []string {
1717- var out []string
1818- seen := make(map[string]bool)
1919- for _, v := range in {
2020- if !seen[v] {
2121- out = append(out, v)
2222- seen[v] = true
2323- }
2424- }
2525- return out
2626-}
2727-2812func ExtractHashtagsPost(post *appbsky.FeedPost) []string {
2913 var tags []string
3014 for _, tag := range post.Tags {
···3721 }
3822 }
3923 }
4040- return dedupeStrings(tags)
2424+ return DedupeStrings(tags)
4125}
42264327func NormalizeHashtag(raw string) string {
···10387 }
10488 }
10589 }
106106- return dedupeStrings(out)
9090+ return DedupeStrings(out)
10791}
1089210993func ExtractBlobCIDsProfile(profile *appbsky.ActorProfile) []string {
···11498 if profile.Banner != nil {
11599 out = append(out, profile.Banner.Ref.String())
116100 }
117117- return dedupeStrings(out)
101101+ return DedupeStrings(out)
118102}
119103120104func ExtractTextTokensPost(post *appbsky.FeedPost) []string {
···150134 s += " " + *profile.DisplayName
151135 }
152136 return keyword.TokenizeText(s)
153153-}
154154-155155-// based on: https://stackoverflow.com/a/48769624, with no trailing period allowed
156156-var urlRegex = regexp.MustCompile(`(?:(?:https?|ftp):\/\/)?[\w/\-?=%.]+\.[\w/\-&?=%.]*[\w/\-&?=%]+`)
157157-158158-func ExtractTextURLs(raw string) []string {
159159- return urlRegex.FindAllString(raw, -1)
160137}
161138162139func ExtractTextURLsProfile(profile *appbsky.ActorProfile) []string {
···191168 return false
192169}
193170194194-// returns a fast, compact hash of a string
195195-//
196196-// current implementation uses murmur3, default seed, and hex encoding
197197-func HashOfString(s string) string {
198198- val := murmur3.Sum64([]byte(s))
199199- return fmt.Sprintf("%016x", val)
200200-}
201201-202171func ParentOrRootIsFollower(c *automod.RecordContext, post *appbsky.FeedPost) bool {
203172 if post.Reply == nil || IsSelfThread(c, post) {
204173 return false
···238207 rel = c.GetAccountRelationship(rootDID)
239208 if rel.FollowedBy {
240209 return true
241241- }
242242- return false
243243-}
244244-245245-// no accounts exist before this time
246246-var atprotoAccountEpoch = time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)
247247-248248-// returns true if account creation timestamp is plausible: not-nil, not in distant past, not in the future
249249-func plausibleAccountCreation(when *time.Time) bool {
250250- if when == nil {
251251- return false
252252- }
253253- // this is mostly to check for misconfigurations or null values (eg, UNIX epoch zero means "unknown" not actually 1970)
254254- if !when.After(atprotoAccountEpoch) {
255255- return false
256256- }
257257- // a timestamp in the future would also indicate some misconfiguration
258258- if when.After(time.Now().Add(time.Hour)) {
259259- return false
260260- }
261261- return true
262262-}
263263-264264-// checks if account was created recently, based on either public or private account metadata. if metadata isn't available at all, or seems bogus, returns 'false'
265265-func AccountIsYoungerThan(c *automod.AccountContext, age time.Duration) bool {
266266- // TODO: consider swapping priority order here (and below)
267267- if c.Account.CreatedAt != nil && plausibleAccountCreation(c.Account.CreatedAt) {
268268- return time.Since(*c.Account.CreatedAt) < age
269269- }
270270- if c.Account.Private != nil && plausibleAccountCreation(c.Account.Private.IndexedAt) {
271271- return time.Since(*c.Account.Private.IndexedAt) < age
272272- }
273273- return false
274274-}
275275-276276-// checks if account was *not* created recently, based on either public or private account metadata. if metadata isn't available at all, or seems bogus, returns 'false'
277277-func AccountIsOlderThan(c *automod.AccountContext, age time.Duration) bool {
278278- if c.Account.CreatedAt != nil && plausibleAccountCreation(c.Account.CreatedAt) {
279279- return time.Since(*c.Account.CreatedAt) >= age
280280- }
281281- if c.Account.Private != nil && plausibleAccountCreation(c.Account.Private.IndexedAt) {
282282- return time.Since(*c.Account.Private.IndexedAt) >= age
283210 }
284211 return false
285212}
···7788 "github.com/bluesky-social/indigo/automod"
99 "github.com/bluesky-social/indigo/automod/countstore"
1010+ "github.com/bluesky-social/indigo/automod/helpers"
1011)
11121213// triggers on first identity event for an account (DID)
1314func NewAccountRule(c *automod.AccountContext) error {
1414- if c.Account.Identity == nil || !AccountIsYoungerThan(c, 4*time.Hour) {
1515+ if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(c, 4*time.Hour) {
1516 return nil
1617 }
1718
+5-4
automod/rules/keyword.go
···7788 appbsky "github.com/bluesky-social/indigo/api/bsky"
99 "github.com/bluesky-social/indigo/automod"
1010+ "github.com/bluesky-social/indigo/automod/helpers"
1011 "github.com/bluesky-social/indigo/automod/keyword"
1112)
1213···1718 isJapanese = true
1819 }
1920 }
2020- for _, tok := range ExtractTextTokensPost(post) {
2121+ for _, tok := range helpers.ExtractTextTokensPost(post) {
2122 word := keyword.SlugIsExplicitSlur(tok)
2223 // used very frequently in a reclaimed context
2324 if word != "" && word != "faggot" && word != "tranny" && word != "coon" && !(word == "kike" && isJapanese) {
···5455 //c.Notify("slack")
5556 }
5657 }
5757- for _, tok := range ExtractTextTokensProfile(profile) {
5858+ for _, tok := range helpers.ExtractTextTokensProfile(profile) {
5859 // de-pluralize
5960 tok = strings.TrimSuffix(tok, "s")
6061 if c.InSet("worst-words", tok) {
···71727273// looks for the specific harassment situation of a replay to another user with only a single word
7374func ReplySingleBadWordPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error {
7474- if post.Reply != nil && !IsSelfThread(c, post) {
7575- tokens := ExtractTextTokensPost(post)
7575+ if post.Reply != nil && !helpers.IsSelfThread(c, post) {
7676+ tokens := helpers.ExtractTextTokensPost(post)
7677 if len(tokens) != 1 {
7778 return nil
7879 }