this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

automod visual auto-labeling, and abusive image scanning (#520)

This is just porting over existing auto-labeling stuff from appview in
to the automod framework.

New engine-level supporting features:

- new "blob" rule function type
- ruleset fetches blobs from upstream PDS, if necessary
- `Effects` can track blob-level takedowns (CDN purges) as part of
record takedowns

authored by

bnewbold and committed by
GitHub
8aaa0545 c029c5cf

+1163 -9
+115
automod/engine/blobs.go
··· 1 + package engine 2 + 3 + import ( 4 + "fmt" 5 + "io" 6 + "net/http" 7 + 8 + appbsky "github.com/bluesky-social/indigo/api/bsky" 9 + lexutil "github.com/bluesky-social/indigo/lex/util" 10 + ) 11 + 12 + // Parses out any blobs from the enclosed record. 13 + // 14 + // TODO: currently this function uses schema-specific logic, and won't work with generic lexicon records. A future version could use the indigo/atproto/data package and the raw record CBOR to extract blobs from arbitrary records 15 + // 16 + // NOTE: for consistency with other RecordContext methods, which don't usually return errors, maybe the error-returning version of this function should be a helper function, or definted on RecordOp, and the RecordContext version should return an empty array on error? 17 + func (c *RecordContext) Blobs() ([]lexutil.LexBlob, error) { 18 + 19 + if c.RecordOp.Action != CreateOp { 20 + // TODO: should this really error, or return empty array? 21 + return nil, fmt.Errorf("expected record creation, got: %s", c.RecordOp.Action) 22 + } 23 + 24 + var blobs []lexutil.LexBlob 25 + 26 + switch c.RecordOp.Collection.String() { 27 + case "app.bsky.feed.post": 28 + post, ok := c.RecordOp.Value.(*appbsky.FeedPost) 29 + if !ok { 30 + return nil, fmt.Errorf("mismatch between collection (%s) and type", c.RecordOp.Collection) 31 + } 32 + if post.Embed != nil && post.Embed.EmbedImages != nil { 33 + for _, eii := range post.Embed.EmbedImages.Images { 34 + if eii.Image != nil { 35 + blobs = append(blobs, *eii.Image) 36 + } 37 + } 38 + } 39 + if post.Embed != nil && post.Embed.EmbedExternal != nil { 40 + ext := post.Embed.EmbedExternal.External 41 + if ext != nil && ext.Thumb != nil { 42 + blobs = append(blobs, *ext.Thumb) 43 + } 44 + } 45 + if post.Embed != nil && post.Embed.EmbedRecordWithMedia != nil { 46 + media := post.Embed.EmbedRecordWithMedia.Media 47 + if media != nil && media.EmbedImages != nil { 48 + for _, eii := range media.EmbedImages.Images { 49 + if eii.Image != nil { 50 + blobs = append(blobs, *eii.Image) 51 + } 52 + } 53 + } 54 + if media != nil && media.EmbedExternal != nil { 55 + ext := media.EmbedExternal.External 56 + if ext != nil && ext.Thumb != nil { 57 + blobs = append(blobs, *ext.Thumb) 58 + } 59 + } 60 + } 61 + case "app.bsky.actor.profile": 62 + profile, ok := c.RecordOp.Value.(*appbsky.ActorProfile) 63 + if !ok { 64 + return nil, fmt.Errorf("mismatch between collection (%s) and type", c.RecordOp.Collection) 65 + } 66 + if profile.Avatar != nil { 67 + blobs = append(blobs, *profile.Avatar) 68 + } 69 + if profile.Banner != nil { 70 + blobs = append(blobs, *profile.Banner) 71 + } 72 + case "app.bsky.graph.list": 73 + list, ok := c.RecordOp.Value.(*appbsky.GraphList) 74 + if !ok { 75 + return nil, fmt.Errorf("mismatch between collection (%s) and type", c.RecordOp.Collection) 76 + } 77 + if list.Avatar != nil { 78 + blobs = append(blobs, *list.Avatar) 79 + } 80 + case "app.bsky.feed.generator": 81 + generator, ok := c.RecordOp.Value.(*appbsky.FeedGenerator) 82 + if !ok { 83 + return nil, fmt.Errorf("mismatch between collection (%s) and type", c.RecordOp.Collection) 84 + } 85 + if generator.Avatar != nil { 86 + blobs = append(blobs, *generator.Avatar) 87 + } 88 + } 89 + return blobs, nil 90 + } 91 + 92 + func fetchBlob(c *RecordContext, blob lexutil.LexBlob) ([]byte, error) { 93 + 94 + var blobBytes []byte 95 + 96 + // TODO: more robust way to write this? 97 + xrpcURL := fmt.Sprintf("%s/xrpc/com.atproto.sync.getBlob?did=%s&cid=%s", c.Account.Identity.PDSEndpoint(), c.Account.Identity.DID, blob.Ref) 98 + 99 + resp, err := http.Get(xrpcURL) 100 + if err != nil { 101 + return nil, err 102 + } 103 + defer resp.Body.Close() 104 + 105 + if resp.StatusCode != 200 { 106 + return nil, fmt.Errorf("failed to fetch blob from PDS. did=%s cid=%s statusCode=%d", c.Account.Identity.DID, blob.Ref, resp.StatusCode) 107 + } 108 + 109 + blobBytes, err = io.ReadAll(resp.Body) 110 + if err != nil { 111 + return nil, err 112 + } 113 + 114 + return blobBytes, nil 115 + }
+4
automod/engine/context.go
··· 182 182 func (c *RecordContext) TakedownRecord() { 183 183 c.effects.TakedownRecord() 184 184 } 185 + 186 + func (c *RecordContext) TakedownBlob(cid string) { 187 + c.effects.TakedownBlob(cid) 188 + }
+7
automod/engine/effects.go
··· 49 49 RecordReports []ModReport 50 50 // Same as "AccountTakedown", but at record-level 51 51 RecordTakedown bool 52 + // Set of Blob CIDs to takedown (eg, purge from CDN) when doing a record takedown 53 + BlobTakedowns []string 52 54 } 53 55 54 56 // Enqueues the named counter to be incremented at the end of all rule processing. Will automatically increment for all time periods. ··· 117 119 func (e *Effects) TakedownRecord() { 118 120 e.RecordTakedown = true 119 121 } 122 + 123 + // Enqueues the blob CID to be taken down (aka, CDN purge) as part of any record takedown 124 + func (e *Effects) TakedownBlob(cid string) { 125 + e.BlobTakedowns = append(e.BlobTakedowns, cid) 126 + }
+10
automod/engine/engine.go
··· 5 5 "fmt" 6 6 "log/slog" 7 7 "strings" 8 + "time" 8 9 9 10 "github.com/bluesky-social/indigo/atproto/identity" 10 11 "github.com/bluesky-social/indigo/atproto/syntax" ··· 15 16 "github.com/bluesky-social/indigo/xrpc" 16 17 ) 17 18 19 + const ( 20 + recordEventTimeout = 20 * time.Second 21 + identityEventTimeout = 10 * time.Second 22 + ) 23 + 18 24 // runtime for executing rules, managing state, and recording moderation actions. 19 25 // 20 26 // NOTE: careful when initializing: several fields must not be nil or zero, even though they are pointer type. ··· 40 46 eng.Logger.Error("automod event execution exception", "err", r, "did", did, "type", typ) 41 47 } 42 48 }() 49 + ctx, cancel := context.WithTimeout(ctx, identityEventTimeout) 50 + defer cancel() 43 51 44 52 ident, err := eng.Directory.LookupDID(ctx, did) 45 53 if err != nil { ··· 75 83 eng.Logger.Error("automod event execution exception", "err", r, "did", op.DID, "collection", op.Collection, "rkey", op.RecordKey) 76 84 } 77 85 }() 86 + ctx, cancel := context.WithTimeout(ctx, recordEventTimeout) 87 + defer cancel() 78 88 79 89 if err := op.Validate(); err != nil { 80 90 return fmt.Errorf("bad record op: %w", err)
+9 -8
automod/engine/persist.go
··· 61 61 if anyModActions && eng.SlackWebhookURL != "" { 62 62 msg := slackBody("⚠️ Automod Account Action ⚠️\n", c.Account, newLabels, newFlags, newReports, newTakedown) 63 63 if err := eng.SendSlackMsg(ctx, msg); err != nil { 64 - eng.Logger.Error("sending slack webhook", "err", err) 64 + c.Logger.Error("sending slack webhook", "err", err) 65 65 } 66 66 } 67 67 ··· 78 78 xrpcc := eng.AdminClient 79 79 80 80 if len(newLabels) > 0 { 81 - eng.Logger.Info("labeling record", "newLabels", newLabels) 81 + c.Logger.Info("labeling record", "newLabels", newLabels) 82 82 comment := "automod" 83 83 _, err := comatproto.AdminEmitModerationEvent(ctx, xrpcc, &comatproto.AdminEmitModerationEvent_Input{ 84 84 CreatedBy: xrpcc.Auth.Did, ··· 113 113 } 114 114 115 115 if newTakedown { 116 - eng.Logger.Warn("account-takedown") 116 + c.Logger.Warn("account-takedown") 117 117 comment := "automod" 118 118 _, err := comatproto.AdminEmitModerationEvent(ctx, xrpcc, &comatproto.AdminEmitModerationEvent_Input{ 119 119 CreatedBy: xrpcc.Auth.Did, ··· 168 168 msg := slackBody("⚠️ Automod Record Action ⚠️\n", c.Account, newLabels, newFlags, newReports, newTakedown) 169 169 msg += fmt.Sprintf("`%s`\n", atURI) 170 170 if err := eng.SendSlackMsg(ctx, msg); err != nil { 171 - eng.Logger.Error("sending slack webhook", "err", err) 171 + c.Logger.Error("sending slack webhook", "err", err) 172 172 } 173 173 } 174 174 } ··· 188 188 } 189 189 190 190 if c.RecordOp.CID == nil { 191 - eng.Logger.Warn("skipping record actions because CID is nil, can't construct strong ref") 191 + c.Logger.Warn("skipping record actions because CID is nil, can't construct strong ref") 192 192 return nil 193 193 } 194 194 cid := *c.RecordOp.CID ··· 199 199 200 200 xrpcc := eng.AdminClient 201 201 if len(newLabels) > 0 { 202 - eng.Logger.Info("labeling record", "newLabels", newLabels) 202 + c.Logger.Info("labeling record", "newLabels", newLabels) 203 203 comment := "automod" 204 204 _, err := comatproto.AdminEmitModerationEvent(ctx, xrpcc, &comatproto.AdminEmitModerationEvent_Input{ 205 205 CreatedBy: xrpcc.Auth.Did, ··· 220 220 } 221 221 222 222 for _, mr := range newReports { 223 - eng.Logger.Info("reporting record", "reasonType", mr.ReasonType, "comment", mr.Comment) 223 + c.Logger.Info("reporting record", "reasonType", mr.ReasonType, "comment", mr.Comment) 224 224 _, err := comatproto.ModerationCreateReport(ctx, xrpcc, &comatproto.ModerationCreateReport_Input{ 225 225 ReasonType: &mr.ReasonType, 226 226 Reason: &mr.Comment, ··· 233 233 } 234 234 } 235 235 if newTakedown { 236 - eng.Logger.Warn("record-takedown") 236 + c.Logger.Warn("record-takedown") 237 237 comment := "automod" 238 238 _, err := comatproto.AdminEmitModerationEvent(ctx, xrpcc, &comatproto.AdminEmitModerationEvent_Input{ 239 239 CreatedBy: xrpcc.Auth.Did, ··· 245 245 Subject: &comatproto.AdminEmitModerationEvent_Input_Subject{ 246 246 RepoStrongRef: &strongRef, 247 247 }, 248 + SubjectBlobCids: dedupeStrings(c.effects.BlobTakedowns), 248 249 }) 249 250 if err != nil { 250 251 return err
+82
automod/engine/ruleset.go
··· 2 2 3 3 import ( 4 4 "fmt" 5 + "sync" 5 6 6 7 appbsky "github.com/bluesky-social/indigo/api/bsky" 8 + lexutil "github.com/bluesky-social/indigo/lex/util" 7 9 ) 8 10 9 11 type RuleSet struct { ··· 12 14 RecordRules []RecordRuleFunc 13 15 RecordDeleteRules []RecordRuleFunc 14 16 IdentityRules []IdentityRuleFunc 17 + BlobRules []BlobRuleFunc 15 18 } 16 19 17 20 func (r *RuleSet) CallRecordRules(c *RecordContext) error { ··· 47 50 } 48 51 } 49 52 } 53 + // then blob rules, if any 54 + if len(r.BlobRules) == 0 { 55 + return nil 56 + } 57 + err := r.fetchAndProcessBlobs(c) 58 + if err != nil { 59 + return err 60 + } 61 + 50 62 return nil 51 63 } 52 64 ··· 69 81 } 70 82 return nil 71 83 } 84 + 85 + // high-level helper for fetching and processing blobs concurrently 86 + func (r *RuleSet) fetchAndProcessBlobs(c *RecordContext) error { 87 + 88 + blobs, err := c.Blobs() 89 + if err != nil { 90 + // TODO: should this really return error, or just log? 91 + return err 92 + } 93 + if len(blobs) == 0 { 94 + return nil 95 + } 96 + 97 + errChan := make(chan error, len(blobs)) 98 + var wg sync.WaitGroup 99 + for _, blob := range blobs { 100 + wg.Add(1) 101 + go func(blob lexutil.LexBlob) { 102 + defer wg.Done() 103 + data, err := fetchBlob(c, blob) 104 + if err != nil { 105 + errChan <- err 106 + return 107 + } 108 + err = r.processBlob(c, blob, data) 109 + if err != nil { 110 + errChan <- err 111 + return 112 + } 113 + }(blob) 114 + 115 + } 116 + wg.Wait() 117 + close(errChan) 118 + 119 + // check for errors 120 + for err := range errChan { 121 + if err != nil { 122 + return err 123 + } 124 + } 125 + return nil 126 + } 127 + 128 + func (r *RuleSet) processBlob(c *RecordContext, blob lexutil.LexBlob, data []byte) error { 129 + errChan := make(chan error, len(r.BlobRules)) 130 + var wg sync.WaitGroup 131 + for _, f := range r.BlobRules { 132 + wg.Add(1) 133 + go func(brf BlobRuleFunc) { 134 + defer wg.Done() 135 + err := brf(c, blob, data) 136 + if err != nil { 137 + errChan <- err 138 + return 139 + } 140 + }(f) 141 + } 142 + 143 + wg.Wait() 144 + close(errChan) 145 + 146 + // check for errors 147 + for err := range errChan { 148 + if err != nil { 149 + return err 150 + } 151 + } 152 + return nil 153 + }
+2
automod/engine/ruletypes.go
··· 2 2 3 3 import ( 4 4 appbsky "github.com/bluesky-social/indigo/api/bsky" 5 + lexutil "github.com/bluesky-social/indigo/lex/util" 5 6 ) 6 7 7 8 type IdentityRuleFunc = func(c *AccountContext) error 8 9 type RecordRuleFunc = func(c *RecordContext) error 9 10 type PostRuleFunc = func(c *RecordContext, post *appbsky.FeedPost) error 10 11 type ProfileRuleFunc = func(c *RecordContext, profile *appbsky.ActorProfile) error 12 + type BlobRuleFunc = func(c *RecordContext, blob lexutil.LexBlob, data []byte) error
+1
automod/pkg.go
··· 17 17 type RecordRuleFunc = engine.RecordRuleFunc 18 18 type PostRuleFunc = engine.PostRuleFunc 19 19 type ProfileRuleFunc = engine.ProfileRuleFunc 20 + type BlobRuleFunc = engine.BlobRuleFunc 20 21 21 22 var ( 22 23 ReportReasonSpam = engine.ReportReasonSpam
+3
automod/rules/all.go
··· 35 35 IdentityRules: []automod.IdentityRuleFunc{ 36 36 NewAccountRule, 37 37 }, 38 + BlobRules: []automod.BlobRuleFunc{ 39 + //BlobVerifyRule, 40 + }, 38 41 } 39 42 return rules 40 43 }
+24
automod/rules/blobs.go
··· 1 + package rules 2 + 3 + import ( 4 + "github.com/bluesky-social/indigo/automod" 5 + lexutil "github.com/bluesky-social/indigo/lex/util" 6 + ) 7 + 8 + var _ automod.BlobRuleFunc = BlobVerifyRule 9 + 10 + func BlobVerifyRule(c *automod.RecordContext, blob lexutil.LexBlob, data []byte) error { 11 + 12 + if len(data) == 0 { 13 + c.AddRecordFlag("empty-blob") 14 + } 15 + 16 + // check size 17 + if blob.Size >= 0 && int64(len(data)) != blob.Size { 18 + c.AddRecordFlag("invalid-blob") 19 + } else { 20 + c.Logger.Info("blob checks out", "cid", blob.Ref, "size", blob.Size, "mimetype", blob.MimeType) 21 + } 22 + 23 + return nil 24 + }
+42
automod/visual/abyss_api.go
··· 1 + package visual 2 + 3 + import ( 4 + lexutil "github.com/bluesky-social/indigo/lex/util" 5 + ) 6 + 7 + type AbyssScanResp struct { 8 + Blob *lexutil.LexBlob `json:"blob"` 9 + Match *AbyssMatchResult `json:"match,omitempty"` 10 + Classify *AbyssClassifyResult `json:"classify,omitempty"` 11 + Review *AbyssReviewState `json:"review,omitempty"` 12 + } 13 + 14 + type AbyssMatchResult struct { 15 + Status string `json:"status"` 16 + Hits []AbyssMatchHit `json:"hits"` 17 + } 18 + 19 + type AbyssMatchHit struct { 20 + HashType string `json:"hashType,omitempty"` 21 + HashValue string `json:"hashValue,omitempty"` 22 + Label string `json:"label,omitempty"` 23 + // TODO: Corpus 24 + } 25 + 26 + type AbyssClassifyResult struct { 27 + // TODO 28 + } 29 + 30 + type AbyssReviewState struct { 31 + State string `json:"state,omitempty"` 32 + TicketID string `json:"ticketId,omitempty"` 33 + } 34 + 35 + func (amr *AbyssMatchResult) IsAbuseMatch() bool { 36 + for _, hit := range amr.Hits { 37 + if hit.Label == "csam" || hit.Label == "csem" { 38 + return true 39 + } 40 + } 41 + return false 42 + }
+75
automod/visual/abyss_client.go
··· 1 + package visual 2 + 3 + import ( 4 + "bytes" 5 + "context" 6 + "encoding/json" 7 + "fmt" 8 + "io" 9 + "log/slog" 10 + "net/http" 11 + 12 + lexutil "github.com/bluesky-social/indigo/lex/util" 13 + "github.com/bluesky-social/indigo/util" 14 + 15 + "github.com/carlmjohnson/versioninfo" 16 + ) 17 + 18 + type AbyssClient struct { 19 + Client http.Client 20 + Host string 21 + Password string 22 + } 23 + 24 + func NewAbyssClient(host, password string) AbyssClient { 25 + return AbyssClient{ 26 + Client: *util.RobustHTTPClient(), 27 + Host: host, 28 + Password: password, 29 + } 30 + } 31 + 32 + func (ac *AbyssClient) ScanBlob(ctx context.Context, blob lexutil.LexBlob, blobBytes []byte, params map[string]string) (*AbyssScanResp, error) { 33 + 34 + slog.Info("sending blob to abyss", "cid", blob.Ref.String(), "mimetype", blob.MimeType, "size", len(blobBytes)) 35 + 36 + body := bytes.NewBuffer(blobBytes) 37 + req, err := http.NewRequest("POST", ac.Host+"/xrpc/com.atproto.unspecced.scanBlob", body) 38 + if err != nil { 39 + return nil, err 40 + } 41 + 42 + q := req.URL.Query() 43 + for k, v := range params { 44 + q.Add(k, v) 45 + } 46 + req.URL.RawQuery = q.Encode() 47 + 48 + req.SetBasicAuth("admin", ac.Password) 49 + req.Header.Add("Content-Type", blob.MimeType) 50 + req.Header.Add("Content-Length", fmt.Sprintf("%d", blob.Size)) 51 + req.Header.Set("Accept", "application/json") 52 + req.Header.Set("User-Agent", "indigo-automod/"+versioninfo.Short()) 53 + 54 + req = req.WithContext(ctx) 55 + res, err := ac.Client.Do(req) 56 + if err != nil { 57 + return nil, fmt.Errorf("abyss request failed: %v", err) 58 + } 59 + defer res.Body.Close() 60 + if res.StatusCode != 200 { 61 + return nil, fmt.Errorf("abyss request failed statusCode=%d", res.StatusCode) 62 + } 63 + 64 + respBytes, err := io.ReadAll(res.Body) 65 + if err != nil { 66 + return nil, fmt.Errorf("failed to read abyss resp body: %v", err) 67 + } 68 + 69 + var respObj AbyssScanResp 70 + if err := json.Unmarshal(respBytes, &respObj); err != nil { 71 + return nil, fmt.Errorf("failed to parse abyss resp JSON: %v", err) 72 + } 73 + slog.Info("abyss-scan-response", "cid", blob.Ref.String(), "obj", respObj) 74 + return &respObj, nil 75 + }
+46
automod/visual/abyss_rule.go
··· 1 + package visual 2 + 3 + import ( 4 + "strings" 5 + 6 + "github.com/bluesky-social/indigo/automod" 7 + lexutil "github.com/bluesky-social/indigo/lex/util" 8 + ) 9 + 10 + func (ac *AbyssClient) AbyssScanBlobRule(c *automod.RecordContext, blob lexutil.LexBlob, data []byte) error { 11 + 12 + if !strings.HasPrefix(blob.MimeType, "image/") { 13 + return nil 14 + } 15 + 16 + params := make(map[string]string) 17 + params["did"] = c.Account.Identity.DID.String() 18 + if !c.Account.Identity.Handle.IsInvalidHandle() { 19 + params["handle"] = c.Account.Identity.Handle.String() 20 + } 21 + if c.Account.Private != nil && c.Account.Private.Email != "" { 22 + params["accountEmail"] = c.Account.Private.Email 23 + } 24 + params["uri"] = c.RecordOp.ATURI().String() 25 + 26 + resp, err := ac.ScanBlob(c.Ctx, blob, data, params) 27 + if err != nil { 28 + return err 29 + } 30 + 31 + if resp.Match == nil || resp.Match.Status != "success" { 32 + // TODO: should this return an error, or just log? 33 + c.Logger.Error("abyss blob scan failed", "cid", blob.Ref.String()) 34 + return nil 35 + } 36 + 37 + if resp.Match.IsAbuseMatch() { 38 + c.Logger.Warn("abyss blob match", "cid", blob.Ref.String()) 39 + c.AddRecordFlag("abyss-match") 40 + c.TakedownRecord() 41 + // purge blob as part of record takedown 42 + c.TakedownBlob(blob.Ref.String()) 43 + } 44 + 45 + return nil 46 + }
+2
automod/visual/doc.go
··· 1 + // automod helpers for visual content (image blobs) 2 + package visual
+211
automod/visual/hiveai_client.go
··· 1 + package visual 2 + 3 + import ( 4 + "bytes" 5 + "context" 6 + "encoding/json" 7 + "fmt" 8 + "io" 9 + "log/slog" 10 + "mime/multipart" 11 + "net/http" 12 + 13 + lexutil "github.com/bluesky-social/indigo/lex/util" 14 + "github.com/bluesky-social/indigo/util" 15 + 16 + "github.com/carlmjohnson/versioninfo" 17 + ) 18 + 19 + type HiveAIClient struct { 20 + Client http.Client 21 + ApiToken string 22 + } 23 + 24 + // schema: https://docs.thehive.ai/reference/classification 25 + type HiveAIResp struct { 26 + Status []HiveAIResp_Status `json:"status"` 27 + } 28 + 29 + type HiveAIResp_Status struct { 30 + Response HiveAIResp_Response `json:"response"` 31 + } 32 + 33 + type HiveAIResp_Response struct { 34 + Output []HiveAIResp_Out `json:"output"` 35 + } 36 + 37 + type HiveAIResp_Out struct { 38 + Time float64 `json:"time"` 39 + Classes []HiveAIResp_Class `json:"classes"` 40 + } 41 + 42 + type HiveAIResp_Class struct { 43 + Class string `json:"class"` 44 + Score float64 `json:"score"` 45 + } 46 + 47 + func NewHiveAIClient(token string) HiveAIClient { 48 + return HiveAIClient{ 49 + Client: *util.RobustHTTPClient(), 50 + ApiToken: token, 51 + } 52 + } 53 + 54 + // Simple direct mappings from individual classes to to labels 55 + // 56 + // hive gore and violence: https://docs.thehive.ai/docs/class-descriptions-violence-gore 57 + func summarizeSimpleLabels(cl []HiveAIResp_Class) []string { 58 + var labels []string 59 + 60 + for _, cls := range cl { 61 + if cls.Class == "very_bloody" && cls.Score >= 0.90 { 62 + labels = append(labels, "gore") 63 + } 64 + if cls.Class == "human_corpse" && cls.Score >= 0.90 { 65 + labels = append(labels, "corpse") 66 + } 67 + if cls.Class == "hanging" && cls.Score >= 0.90 { 68 + labels = append(labels, "corpse") 69 + } 70 + if cls.Class == "yes_self_harm" && cls.Score >= 0.96 { 71 + labels = append(labels, "self-harm") 72 + } 73 + } 74 + return labels 75 + } 76 + 77 + // Matches only one (or none) of: porn, sexual, nudity 78 + // 79 + // porn: sexual and nudity. including both explicit activity or full-frontal and suggestive/intent 80 + // sexual: sexually suggestive, not explicit; may include some forms of nudity 81 + // nudity: non-sexual nudity (eg, artistic, possibly some photographic) 82 + // 83 + // hive docs/definitions: https://docs.thehive.ai/docs/sexual-content 84 + func summarizeSexualLabels(cl []HiveAIResp_Class) string { 85 + 86 + scores := make(map[string]float64) 87 + for _, cls := range cl { 88 + scores[cls.Class] = cls.Score 89 + } 90 + 91 + // first check if porn... 92 + for _, pornClass := range []string{"yes_sexual_activity", "animal_genitalia_and_human", "yes_realistic_nsfw"} { 93 + if scores[pornClass] >= 0.9 { 94 + return "porn" 95 + } 96 + } 97 + if scores["general_nsfw"] >= 0.9 { 98 + // special case for some anime examples 99 + if scores["animated_animal_genitalia"] >= 0.5 { 100 + return "porn" 101 + } 102 + 103 + // special case for some pornographic/explicit classic drawings 104 + if scores["yes_undressed"] >= 0.9 && scores["yes_sexual_activity"] >= 0.9 { 105 + return "porn" 106 + } 107 + } 108 + 109 + // then check for sexual suggestive (which may include nudity)... 110 + for _, sexualClass := range []string{"yes_sexual_intent", "yes_sex_toy"} { 111 + if scores[sexualClass] >= 0.9 { 112 + return "sexual" 113 + } 114 + } 115 + if scores["yes_undressed"] >= 0.9 { 116 + // special case for bondage examples 117 + if scores["yes_sex_toy"] > 0.75 { 118 + return "sexual" 119 + } 120 + } 121 + 122 + // then non-sexual nudity... 123 + for _, nudityClass := range []string{"yes_male_nudity", "yes_female_nudity", "yes_undressed"} { 124 + if scores[nudityClass] >= 0.9 { 125 + return "nudity" 126 + } 127 + } 128 + 129 + // then finally flag remaining "underwear" images in to sexually suggestive 130 + // (after non-sexual content already labeled above) 131 + for _, underwearClass := range []string{"yes_male_underwear", "yes_female_underwear"} { 132 + if scores[underwearClass] >= 0.9 { 133 + return "sexual" 134 + } 135 + } 136 + 137 + return "" 138 + } 139 + 140 + func (resp *HiveAIResp) SummarizeLabels() []string { 141 + var labels []string 142 + 143 + for _, status := range resp.Status { 144 + for _, out := range status.Response.Output { 145 + simple := summarizeSimpleLabels(out.Classes) 146 + if len(simple) > 0 { 147 + labels = append(labels, simple...) 148 + } 149 + 150 + sexual := summarizeSexualLabels(out.Classes) 151 + if sexual != "" { 152 + labels = append(labels, sexual) 153 + } 154 + } 155 + } 156 + 157 + return labels 158 + } 159 + 160 + func (hal *HiveAIClient) LabelBlob(ctx context.Context, blob lexutil.LexBlob, blobBytes []byte) ([]string, error) { 161 + 162 + slog.Info("sending blob to Hive AI", "cid", blob.Ref.String(), "mimetype", blob.MimeType, "size", len(blobBytes)) 163 + 164 + // generic HTTP form file upload, then parse the response JSON 165 + body := &bytes.Buffer{} 166 + writer := multipart.NewWriter(body) 167 + part, err := writer.CreateFormFile("media", blob.Ref.String()) 168 + if err != nil { 169 + return nil, err 170 + } 171 + _, err = part.Write(blobBytes) 172 + if err != nil { 173 + return nil, err 174 + } 175 + err = writer.Close() 176 + if err != nil { 177 + return nil, err 178 + } 179 + 180 + req, err := http.NewRequest("POST", "https://api.thehive.ai/api/v2/task/sync", body) 181 + if err != nil { 182 + return nil, err 183 + } 184 + 185 + req.Header.Set("Authorization", fmt.Sprintf("Token %s", hal.ApiToken)) 186 + req.Header.Add("Content-Type", writer.FormDataContentType()) 187 + req.Header.Set("Accept", "application/json") 188 + req.Header.Set("User-Agent", "indigo-automod/"+versioninfo.Short()) 189 + 190 + req = req.WithContext(ctx) 191 + res, err := hal.Client.Do(req) 192 + if err != nil { 193 + return nil, fmt.Errorf("HiveAI request failed: %v", err) 194 + } 195 + defer res.Body.Close() 196 + if res.StatusCode != 200 { 197 + return nil, fmt.Errorf("HiveAI request failed statusCode=%d", res.StatusCode) 198 + } 199 + 200 + respBytes, err := io.ReadAll(res.Body) 201 + if err != nil { 202 + return nil, fmt.Errorf("failed to read HiveAI resp body: %v", err) 203 + } 204 + 205 + var respObj HiveAIResp 206 + if err := json.Unmarshal(respBytes, &respObj); err != nil { 207 + return nil, fmt.Errorf("failed to parse HiveAI resp JSON: %v", err) 208 + } 209 + slog.Info("hive-ai-response", "cid", blob.Ref.String(), "obj", respObj) 210 + return respObj.SummarizeLabels(), nil 211 + }
+26
automod/visual/hiveai_rule.go
··· 1 + package visual 2 + 3 + import ( 4 + "strings" 5 + 6 + "github.com/bluesky-social/indigo/automod" 7 + lexutil "github.com/bluesky-social/indigo/lex/util" 8 + ) 9 + 10 + func (hal *HiveAIClient) HiveLabelBlobRule(c *automod.RecordContext, blob lexutil.LexBlob, data []byte) error { 11 + 12 + if !strings.HasPrefix(blob.MimeType, "image/") { 13 + return nil 14 + } 15 + 16 + labels, err := hal.LabelBlob(c.Ctx, blob, data) 17 + if err != nil { 18 + return err 19 + } 20 + 21 + for _, l := range labels { 22 + c.AddRecordLabel(l) 23 + } 24 + 25 + return nil 26 + }
+42
automod/visual/hiveai_test.go
··· 1 + package visual 2 + 3 + import ( 4 + "encoding/json" 5 + "io" 6 + "os" 7 + "reflect" 8 + "testing" 9 + ) 10 + 11 + func TestHiveParse(t *testing.T) { 12 + file, err := os.Open("testdata/hiveai_resp_example.json") 13 + if err != nil { 14 + t.Fatal(err) 15 + } 16 + 17 + respBytes, err := io.ReadAll(file) 18 + if err != nil { 19 + t.Fatal(err) 20 + } 21 + 22 + var respObj HiveAIResp 23 + if err := json.Unmarshal(respBytes, &respObj); err != nil { 24 + t.Fatal(err) 25 + } 26 + 27 + classes := respObj.Status[0].Response.Output[0].Classes 28 + if len(classes) <= 10 { 29 + t.Fatal("didn't get expected class count") 30 + } 31 + for _, c := range classes { 32 + if c.Class == "" || c.Score == 0.0 { 33 + t.Fatal("got null/empty class in resp") 34 + } 35 + } 36 + 37 + labels := respObj.SummarizeLabels() 38 + expected := []string{"porn"} 39 + if !reflect.DeepEqual(labels, expected) { 40 + t.Fatal("didn't summarize to expected labels") 41 + } 42 + }
+401
automod/visual/testdata/hiveai_resp_example.json
··· 1 + { 2 + "id": "02122580-c37f-11ed-81d2-000000000000", 3 + "code": 200, 4 + "project_id": 12345, 5 + "user_id": 12345, 6 + "created_on": "2023-03-15T22:16:18.408Z", 7 + "status": [ 8 + { 9 + "status": { 10 + "code": "0", 11 + "message": "SUCCESS" 12 + }, 13 + "response": { 14 + "input": { 15 + "id": "02122580-c37f-11ed-81d2-000000000000", 16 + "charge": 0.003, 17 + "model": "mod55_dense", 18 + "model_version": 1, 19 + "model_type": "CATEGORIZATION", 20 + "created_on": "2023-03-15T22:16:18.136Z", 21 + "media": { 22 + "url": null, 23 + "filename": "bafkreiam7k6mvkyuoybq4ynhljvj5xa75sdbhjbolzjf5j2udx7vj5gnsy", 24 + "type": "PHOTO", 25 + "mime_type": "jpeg", 26 + "mimetype": "image/jpeg", 27 + "width": 800, 28 + "height": 800, 29 + "num_frames": 1, 30 + "duration": 0 31 + }, 32 + "user_id": 12345, 33 + "project_id": 12345, 34 + "config_version": 1, 35 + "config_tag": "default" 36 + }, 37 + "output": [ 38 + { 39 + "time": 0, 40 + "classes": [ 41 + { 42 + "class": "general_not_nsfw_not_suggestive", 43 + "score": 0.9998097218132356 44 + }, 45 + { 46 + "class": "general_nsfw", 47 + "score": 8.857344804177162e-5 48 + }, 49 + { 50 + "class": "general_suggestive", 51 + "score": 0.00010170473872266839 52 + }, 53 + { 54 + "class": "no_female_underwear", 55 + "score": 0.9999923079040384 56 + }, 57 + { 58 + "class": "yes_female_underwear", 59 + "score": 7.692095961599136e-6 60 + }, 61 + { 62 + "class": "no_male_underwear", 63 + "score": 0.9999984904867634 64 + }, 65 + { 66 + "class": "yes_male_underwear", 67 + "score": 1.5095132367094679e-6 68 + }, 69 + { 70 + "class": "no_sex_toy", 71 + "score": 0.9999970970762551 72 + }, 73 + { 74 + "class": "yes_sex_toy", 75 + "score": 2.9029237450490604e-6 76 + }, 77 + { 78 + "class": "no_female_nudity", 79 + "score": 0.9999739028909301 80 + }, 81 + { 82 + "class": "yes_female_nudity", 83 + "score": 2.60971090699536e-5 84 + }, 85 + { 86 + "class": "no_male_nudity", 87 + "score": 0.9999711373083747 88 + }, 89 + { 90 + "class": "yes_male_nudity", 91 + "score": 2.8862691625255323e-5 92 + }, 93 + { 94 + "class": "no_female_swimwear", 95 + "score": 0.9999917609899659 96 + }, 97 + { 98 + "class": "yes_female_swimwear", 99 + "score": 8.239010034025379e-6 100 + }, 101 + { 102 + "class": "no_male_shirtless", 103 + "score": 0.9999583350744331 104 + }, 105 + { 106 + "class": "yes_male_shirtless", 107 + "score": 4.166492556688088e-5 108 + }, 109 + { 110 + "class": "no_text", 111 + "score": 0.9958378716447616 112 + }, 113 + { 114 + "class": "text", 115 + "score": 0.0041621283552384265 116 + }, 117 + { 118 + "class": "animated", 119 + "score": 0.46755478950048235 120 + }, 121 + { 122 + "class": "hybrid", 123 + "score": 0.0011440363434524984 124 + }, 125 + { 126 + "class": "natural", 127 + "score": 0.5313011741560651 128 + }, 129 + { 130 + "class": "animated_gun", 131 + "score": 2.0713000782979496e-5 132 + }, 133 + { 134 + "class": "gun_in_hand", 135 + "score": 1.5844730446534659e-6 136 + }, 137 + { 138 + "class": "gun_not_in_hand", 139 + "score": 1.0338973818006654e-6 140 + }, 141 + { 142 + "class": "no_gun", 143 + "score": 0.9999766686287906 144 + }, 145 + { 146 + "class": "culinary_knife_in_hand", 147 + "score": 3.8063500083369785e-6 148 + }, 149 + { 150 + "class": "culinary_knife_not_in_hand", 151 + "score": 7.94057948996249e-7 152 + }, 153 + { 154 + "class": "knife_in_hand", 155 + "score": 4.5578955723278505e-7 156 + }, 157 + { 158 + "class": "knife_not_in_hand", 159 + "score": 3.842124714748908e-7 160 + }, 161 + { 162 + "class": "no_knife", 163 + "score": 0.999994559590014 164 + }, 165 + { 166 + "class": "a_little_bloody", 167 + "score": 2.1317745626539786e-7 168 + }, 169 + { 170 + "class": "no_blood", 171 + "score": 0.9999793341236429 172 + }, 173 + { 174 + "class": "other_blood", 175 + "score": 2.0322054269591763e-5 176 + }, 177 + { 178 + "class": "very_bloody", 179 + "score": 1.306446309561673e-7 180 + }, 181 + { 182 + "class": "no_pills", 183 + "score": 0.9999989592376954 184 + }, 185 + { 186 + "class": "yes_pills", 187 + "score": 1.0407623044588633e-6 188 + }, 189 + { 190 + "class": "no_smoking", 191 + "score": 0.9999939101969173 192 + }, 193 + { 194 + "class": "yes_smoking", 195 + "score": 6.089803082758281e-6 196 + }, 197 + { 198 + "class": "illicit_injectables", 199 + "score": 6.925695592003094e-7 200 + }, 201 + { 202 + "class": "medical_injectables", 203 + "score": 8.587808234452378e-7 204 + }, 205 + { 206 + "class": "no_injectables", 207 + "score": 0.9999984486496174 208 + }, 209 + { 210 + "class": "no_nazi", 211 + "score": 0.9999987449628097 212 + }, 213 + { 214 + "class": "yes_nazi", 215 + "score": 1.2550371902234279e-6 216 + }, 217 + { 218 + "class": "no_kkk", 219 + "score": 0.999999762417549 220 + }, 221 + { 222 + "class": "yes_kkk", 223 + "score": 2.3758245111050425e-7 224 + }, 225 + { 226 + "class": "no_middle_finger", 227 + "score": 0.9999881515231847 228 + }, 229 + { 230 + "class": "yes_middle_finger", 231 + "score": 1.184847681536747e-5 232 + }, 233 + { 234 + "class": "no_terrorist", 235 + "score": 0.9999998870793229 236 + }, 237 + { 238 + "class": "yes_terrorist", 239 + "score": 1.1292067715380635e-7 240 + }, 241 + { 242 + "class": "no_overlay_text", 243 + "score": 0.9996453363440359 244 + }, 245 + { 246 + "class": "yes_overlay_text", 247 + "score": 0.0003546636559640924 248 + }, 249 + { 250 + "class": "no_sexual_activity", 251 + "score": 0.9999563580374798 252 + }, 253 + { 254 + "class": "yes_sexual_activity", 255 + "score": 0.99, 256 + "realScore": 4.364196252012032e-5 257 + }, 258 + { 259 + "class": "hanging", 260 + "score": 3.6435135762510905e-7 261 + }, 262 + { 263 + "class": "no_hanging_no_noose", 264 + "score": 0.9999980779196416 265 + }, 266 + { 267 + "class": "noose", 268 + "score": 1.5577290007796094e-6 269 + }, 270 + { 271 + "class": "no_realistic_nsfw", 272 + "score": 0.9999944341007805 273 + }, 274 + { 275 + "class": "yes_realistic_nsfw", 276 + "score": 5.565899219571182e-6 277 + }, 278 + { 279 + "class": "animated_corpse", 280 + "score": 5.276802046755426e-7 281 + }, 282 + { 283 + "class": "human_corpse", 284 + "score": 2.5449360984211012e-8 285 + }, 286 + { 287 + "class": "no_corpse", 288 + "score": 0.9999994468704343 289 + }, 290 + { 291 + "class": "no_self_harm", 292 + "score": 0.9999994515625507 293 + }, 294 + { 295 + "class": "yes_self_harm", 296 + "score": 5.484374493605692e-7 297 + }, 298 + { 299 + "class": "no_drawing", 300 + "score": 0.9978276028816608 301 + }, 302 + { 303 + "class": "yes_drawing", 304 + "score": 0.0021723971183392485 305 + }, 306 + { 307 + "class": "no_emaciated_body", 308 + "score": 0.9999998146500432 309 + }, 310 + { 311 + "class": "yes_emaciated_body", 312 + "score": 1.853499568724518e-7 313 + }, 314 + { 315 + "class": "no_child_present", 316 + "score": 0.9999970498515446 317 + }, 318 + { 319 + "class": "yes_child_present", 320 + "score": 2.950148455380443e-6 321 + }, 322 + { 323 + "class": "no_sexual_intent", 324 + "score": 0.9999963861546292 325 + }, 326 + { 327 + "class": "yes_sexual_intent", 328 + "score": 3.613845370766111e-6 329 + }, 330 + { 331 + "class": "animal_genitalia_and_human", 332 + "score": 2.255472023465222e-8 333 + }, 334 + { 335 + "class": "animal_genitalia_only", 336 + "score": 4.6783185199931176e-7 337 + }, 338 + { 339 + "class": "animated_animal_genitalia", 340 + "score": 6.707857419436447e-7 341 + }, 342 + { 343 + "class": "no_animal_genitalia", 344 + "score": 0.9999988388276858 345 + }, 346 + { 347 + "class": "no_gambling", 348 + "score": 0.9999960939687145 349 + }, 350 + { 351 + "class": "yes_gambling", 352 + "score": 3.906031285604864e-6 353 + }, 354 + { 355 + "class": "no_undressed", 356 + "score": 0.99999923356218 357 + }, 358 + { 359 + "class": "yes_undressed", 360 + "score": 7.664378199789045e-7 361 + }, 362 + { 363 + "class": "no_confederate", 364 + "score": 0.9999925456900376 365 + }, 366 + { 367 + "class": "yes_confederate", 368 + "score": 7.454309962453175e-6 369 + }, 370 + { 371 + "class": "animated_alcohol", 372 + "score": 1.8109949948066074e-6 373 + }, 374 + { 375 + "class": "no_alcohol", 376 + "score": 0.9999916620957963 377 + }, 378 + { 379 + "class": "yes_alcohol", 380 + "score": 5.88781463445443e-6 381 + }, 382 + { 383 + "class": "yes_drinking_alcohol", 384 + "score": 6.390945746578106e-7 385 + }, 386 + { 387 + "class": "no_religious_icon", 388 + "score": 0.9999862158580689 389 + }, 390 + { 391 + "class": "yes_religious_icon", 392 + "score": 1.3784141931119298e-5 393 + } 394 + ] 395 + } 396 + ] 397 + } 398 + } 399 + ], 400 + "from_cache": false 401 + }
+28
cmd/hepa/main.go
··· 93 93 Usage: "file path of JSON file containing static sets", 94 94 EnvVars: []string{"HEPA_SETS_JSON_PATH"}, 95 95 }, 96 + &cli.StringFlag{ 97 + Name: "hiveai-api-token", 98 + Usage: "API token for Hive AI image auto-labeling", 99 + EnvVars: []string{"HIVEAI_API_TOKEN"}, 100 + }, 101 + &cli.StringFlag{ 102 + Name: "abyss-host", 103 + Usage: "host for abusive image scanning API (scheme, host, port)", 104 + EnvVars: []string{"ABYSS_HOST"}, 105 + }, 106 + &cli.StringFlag{ 107 + Name: "abyss-password", 108 + Usage: "admin auth password for abyss API", 109 + EnvVars: []string{"ABYSS_PASSWORD"}, 110 + }, 111 + &cli.StringFlag{ 112 + Name: "ruleset", 113 + Usage: "which ruleset config to use: default, no-blobs, only-blobs", 114 + EnvVars: []string{"HEPA_RULESET"}, 115 + }, 96 116 } 97 117 98 118 app.Commands = []*cli.Command{ ··· 173 193 SetsFileJSON: cctx.String("sets-json-path"), 174 194 RedisURL: cctx.String("redis-url"), 175 195 SlackWebhookURL: cctx.String("slack-webhook-url"), 196 + HiveAPIToken: cctx.String("hiveai-api-token"), 197 + AbyssHost: cctx.String("abyss-host"), 198 + AbyssPassword: cctx.String("abyss-password"), 199 + RulesetName: cctx.String("ruleset"), 176 200 }, 177 201 ) 178 202 if err != nil { ··· 234 258 ModPassword: cctx.String("mod-password"), 235 259 SetsFileJSON: cctx.String("sets-json-path"), 236 260 RedisURL: cctx.String("redis-url"), 261 + HiveAPIToken: cctx.String("hiveai-api-token"), 262 + AbyssHost: cctx.String("abyss-host"), 263 + AbyssPassword: cctx.String("abyss-password"), 264 + RulesetName: cctx.String("ruleset"), 237 265 }, 238 266 ) 239 267 }
+33 -1
cmd/hepa/server.go
··· 17 17 "github.com/bluesky-social/indigo/automod/flagstore" 18 18 "github.com/bluesky-social/indigo/automod/rules" 19 19 "github.com/bluesky-social/indigo/automod/setstore" 20 + "github.com/bluesky-social/indigo/automod/visual" 20 21 "github.com/bluesky-social/indigo/util" 21 22 "github.com/bluesky-social/indigo/xrpc" 22 23 ··· 42 43 SetsFileJSON string 43 44 RedisURL string 44 45 SlackWebhookURL string 46 + HiveAPIToken string 47 + AbyssHost string 48 + AbyssPassword string 49 + RulesetName string 45 50 Logger *slog.Logger 46 51 } 47 52 ··· 130 135 flags = flagstore.NewMemFlagStore() 131 136 } 132 137 138 + extraBlobRules := []automod.BlobRuleFunc{} 139 + if config.HiveAPIToken != "" { 140 + logger.Info("configuring Hive AI image labeler") 141 + hc := visual.NewHiveAIClient(config.HiveAPIToken) 142 + extraBlobRules = append(extraBlobRules, hc.HiveLabelBlobRule) 143 + } 144 + 145 + if config.AbyssHost != "" && config.AbyssPassword != "" { 146 + logger.Info("configuring abyss abusive image scanning") 147 + ac := visual.NewAbyssClient(config.AbyssHost, config.AbyssPassword) 148 + extraBlobRules = append(extraBlobRules, ac.AbyssScanBlobRule) 149 + } 150 + 151 + var ruleset automod.RuleSet 152 + switch config.RulesetName { 153 + case "", "default": 154 + ruleset = rules.DefaultRules() 155 + ruleset.BlobRules = append(ruleset.BlobRules, extraBlobRules...) 156 + case "no-blobs": 157 + ruleset = rules.DefaultRules() 158 + ruleset.BlobRules = []automod.BlobRuleFunc{} 159 + case "only-blobs": 160 + ruleset.BlobRules = extraBlobRules 161 + default: 162 + return nil, fmt.Errorf("unknown ruleset config: %s", config.RulesetName) 163 + } 164 + 133 165 engine := automod.Engine{ 134 166 Logger: logger, 135 167 Directory: dir, ··· 137 169 Sets: sets, 138 170 Flags: flags, 139 171 Cache: cache, 140 - Rules: rules.DefaultRules(), 172 + Rules: ruleset, 141 173 AdminClient: xrpcc, 142 174 BskyClient: &xrpc.Client{ 143 175 Client: util.RobustHTTPClient(),