this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

some more work on debugging issues in event streams (#127)

authored by

Whyrusleeping and committed by
GitHub
8c8248dc 22c94f06

+247 -31
+3 -2
api/atproto/syncsubscribeRepos.go
··· 10 10 } 11 11 12 12 type SyncSubscribeRepos_Commit struct { 13 - Blobs []util.LexLink `json:"blobs" cborgen:"blobs"` 14 - Blocks util.LexBytes `json:"blocks" cborgen:"blocks"` 13 + Blobs []util.LexLink `json:"blobs" cborgen:"blobs"` 14 + // TODO: need to fix lexgen to make LexBytes 'omitempty'. This is currently being manually edited 15 + Blocks util.LexBytes `json:"blocks,omitempty" cborgen:"blocks"` 15 16 Commit util.LexLink `json:"commit" cborgen:"commit"` 16 17 Ops []*SyncSubscribeRepos_RepoOp `json:"ops" cborgen:"ops"` 17 18 Prev *util.LexLink `json:"prev" cborgen:"prev"`
+18 -8
bgs/bgs.go
··· 357 357 358 358 if err := bgs.repoman.HandleExternalUserEvent(ctx, host.ID, u.ID, u.Did, (*cid.Cid)(evt.Prev), evt.Blocks); err != nil { 359 359 log.Warnw("failed handling event", "err", err, "host", host.Host, "seq", evt.Seq, "repo", u.Did, "prev", stringLink(evt.Prev), "commit", evt.Commit.String()) 360 - if !errors.Is(err, carstore.ErrRepoBaseMismatch) { 361 - return fmt.Errorf("handle user event failed: %w", err) 362 - } 360 + 361 + if errors.Is(err, carstore.ErrRepoBaseMismatch) { 362 + ai, err := bgs.Index.LookupUser(ctx, u.ID) 363 + if err != nil { 364 + return err 365 + } 363 366 364 - ai, err := bgs.Index.LookupUser(ctx, u.ID) 365 - if err != nil { 366 - return err 367 + span.SetAttributes(attribute.Bool("catchup_queue", true)) 368 + 369 + return bgs.Index.Crawler.AddToCatchupQueue(ctx, host, ai, evt) 367 370 } 368 371 369 - span.SetAttributes(attribute.Bool("catchup_queue", true)) 372 + if errors.Is(err, carstore.ErrRepoFork) { 373 + log.Errorw("detected repo fork", "from", stringLink(evt.Prev), "host", host.Host, "repo", u.Did) 374 + 375 + span.SetAttributes(attribute.Bool("catchup_queue", true)) 376 + span.SetAttributes(attribute.Bool("fork", true)) 370 377 371 - return bgs.Index.Crawler.AddToCatchupQueue(ctx, host, ai, evt) 378 + return fmt.Errorf("cannot process repo fork") 379 + } 380 + 381 + return fmt.Errorf("handle user event failed: %w", err) 372 382 } 373 383 374 384 // sync blobs
+34
carstore/bs.go
··· 278 278 279 279 var ErrRepoBaseMismatch = fmt.Errorf("attempted a delta session on top of the wrong previous head") 280 280 281 + var ErrRepoFork = fmt.Errorf("repo fork detected") 282 + 281 283 func (cs *CarStore) NewDeltaSession(ctx context.Context, user util.Uid, prev *cid.Cid) (*DeltaSession, error) { 282 284 ctx, span := otel.Tracer("carstore").Start(ctx, "NewSession") 283 285 defer span.End() ··· 291 293 292 294 if prev != nil { 293 295 if lastShard.Root.CID != *prev { 296 + fork, err := cs.checkFork(ctx, user, *prev) 297 + if err != nil { 298 + return nil, fmt.Errorf("failed to check carstore base mismatch for fork condition: %w", err) 299 + } 300 + 301 + if fork { 302 + return nil, fmt.Errorf("fork at %s: %w", prev.String(), ErrRepoFork) 303 + } 304 + 294 305 return nil, fmt.Errorf("mismatch: %s != %s: %w", lastShard.Root.CID, prev.String(), ErrRepoBaseMismatch) 295 306 } 296 307 } ··· 678 689 679 690 return out, nil 680 691 } 692 + 693 + func (cs *CarStore) checkFork(ctx context.Context, user util.Uid, prev cid.Cid) (bool, error) { 694 + lastShard, err := cs.getLastShard(ctx, user) 695 + if err != nil { 696 + return false, err 697 + } 698 + 699 + var maybeShard CarShard 700 + if err := cs.meta.WithContext(ctx).Model(CarShard{}).Find(&maybeShard, "usr = ? AND root = ?", user, &util.DbCID{prev}).Error; err != nil { 701 + return false, err 702 + } 703 + 704 + if maybeShard.ID == lastShard.ID { 705 + // somehow we are checking if a valid 'append' is a fork, seems buggy, throw an error 706 + return false, fmt.Errorf("invariant broken: checked for forkiness of a valid append") 707 + } 708 + 709 + if maybeShard.ID == 0 { 710 + return false, nil 711 + } 712 + 713 + return true, nil 714 + }
+30 -19
cmd/gosky/debug.go
··· 11 11 12 12 comatproto "github.com/bluesky-social/indigo/api/atproto" 13 13 "github.com/bluesky-social/indigo/events" 14 + "github.com/bluesky-social/indigo/lex/util" 14 15 "github.com/bluesky-social/indigo/repo" 15 16 "github.com/bluesky-social/indigo/repomgr" 16 17 ··· 143 144 LastSeq int64 144 145 } 145 146 147 + func cidStr(c *util.LexLink) string { 148 + if c == nil { 149 + return "<nil>" 150 + } 151 + 152 + return c.String() 153 + } 154 + 146 155 var debugStreamCmd = &cli.Command{ 147 156 Name: "debug-stream", 148 157 Flags: []cli.Flag{ ··· 177 186 178 187 fmt.Printf("\rChecking seq: %d ", evt.Seq) 179 188 180 - r, err := repo.ReadRepoFromCar(ctx, bytes.NewReader(evt.Blocks)) 181 - if err != nil { 182 - fmt.Printf("\nEvent at sequence %d had an invalid repo slice: %s\n", evt.Seq, err) 183 - return nil 184 - } else { 185 - prev, err := r.PrevCommit(ctx) 189 + if !evt.TooBig { 190 + r, err := repo.ReadRepoFromCar(ctx, bytes.NewReader(evt.Blocks)) 186 191 if err != nil { 187 - return err 188 - } 192 + fmt.Printf("\nEvent at sequence %d had an invalid repo slice: %s\n", evt.Seq, err) 193 + return nil 194 + } else { 195 + prev, err := r.PrevCommit(ctx) 196 + if err != nil { 197 + return err 198 + } 189 199 190 - var cs, es string 191 - if prev != nil { 192 - cs = prev.String() 193 - } 200 + var cs, es string 201 + if prev != nil { 202 + cs = prev.String() 203 + } 194 204 195 - if evt.Prev != nil { 196 - es = evt.Prev.String() 197 - } 205 + if evt.Prev != nil { 206 + es = evt.Prev.String() 207 + } 198 208 199 - if cs != es { 200 - fmt.Printf("\nEvent at sequence %d has mismatch between slice prev and struct prev: %s != %s\n", evt.Seq, prev, evt.Prev) 209 + if cs != es { 210 + fmt.Printf("\nEvent at sequence %d has mismatch between slice prev and struct prev: %s != %s\n", evt.Seq, prev, evt.Prev) 211 + } 201 212 } 202 213 } 203 214 204 215 cur, ok := infos[evt.Repo] 205 216 if ok { 206 - if cur.LastCid.String() != evt.Prev.String() { 217 + if cur.LastCid.String() != cidStr(evt.Prev) { 207 218 fmt.Println() 208 - fmt.Printf("Event at sequence %d, repo=%s had prev=%s head=%s, but last commit we saw was %s (seq=%d)\n", evt.Seq, evt.Repo, evt.Prev.String(), evt.Commit.String(), evt.Commit.String(), cur.LastSeq) 219 + fmt.Printf("Event at sequence %d, repo=%s had prev=%s head=%s, but last commit we saw was %s (seq=%d)\n", evt.Seq, evt.Repo, cidStr(evt.Prev), evt.Commit.String(), cur.LastCid, cur.LastSeq) 209 220 } 210 221 } 211 222
+159
cmd/gosky/streamdiff.go
··· 1 + package main 2 + 3 + import ( 4 + "context" 5 + "fmt" 6 + "net/http" 7 + 8 + comatproto "github.com/bluesky-social/indigo/api/atproto" 9 + "github.com/bluesky-social/indigo/events" 10 + "github.com/gorilla/websocket" 11 + cli "github.com/urfave/cli/v2" 12 + ) 13 + 14 + // TODO: WIP - turns out to be more complicated than i initially thought 15 + var streamCompareCmd = &cli.Command{ 16 + Name: "diff-stream", 17 + Flags: []cli.Flag{}, 18 + Action: func(cctx *cli.Context) error { 19 + d := websocket.DefaultDialer 20 + 21 + hosta := cctx.Args().Get(0) 22 + hostb := cctx.Args().Get(1) 23 + 24 + cona, _, err := d.Dial(fmt.Sprintf("%s/xrpc/com.atproto.sync.subscribeRepos", hosta), http.Header{}) 25 + if err != nil { 26 + return fmt.Errorf("dial failure: %w", err) 27 + } 28 + 29 + conb, _, err := d.Dial(fmt.Sprintf("%s/xrpc/com.atproto.sync.subscribeRepos", hostb), http.Header{}) 30 + if err != nil { 31 + return fmt.Errorf("dial failure: %w", err) 32 + } 33 + 34 + sd := &streamDiffer{} 35 + ctx, cancel := context.WithCancel(context.Background()) 36 + defer cancel() 37 + 38 + go func() { 39 + err = events.HandleRepoStream(ctx, cona, &events.RepoStreamCallbacks{ 40 + RepoCommit: func(evt *comatproto.SyncSubscribeRepos_Commit) error { 41 + sd.PushA(&events.XRPCStreamEvent{ 42 + RepoCommit: evt, 43 + }) 44 + return nil 45 + }, 46 + RepoInfo: func(evt *comatproto.SyncSubscribeRepos_Info) error { 47 + return nil 48 + }, 49 + // TODO: all the other Repo* event types 50 + Error: func(evt *events.ErrorFrame) error { 51 + return fmt.Errorf("%s: %s", evt.Error, evt.Message) 52 + }, 53 + }) 54 + if err != nil { 55 + log.Errorf("stream A failed: %s", err) 56 + } 57 + }() 58 + 59 + go func() { 60 + err = events.HandleRepoStream(ctx, conb, &events.RepoStreamCallbacks{ 61 + RepoCommit: func(evt *comatproto.SyncSubscribeRepos_Commit) error { 62 + sd.PushB(&events.XRPCStreamEvent{ 63 + RepoCommit: evt, 64 + }) 65 + return nil 66 + }, 67 + RepoInfo: func(evt *comatproto.SyncSubscribeRepos_Info) error { 68 + return nil 69 + }, 70 + // TODO: all the other Repo* event types 71 + Error: func(evt *events.ErrorFrame) error { 72 + return fmt.Errorf("%s: %s", evt.Error, evt.Message) 73 + }, 74 + }) 75 + if err != nil { 76 + log.Errorf("stream A failed: %s", err) 77 + } 78 + }() 79 + 80 + select {} 81 + }, 82 + } 83 + 84 + type streamDiffer struct { 85 + Aevts []*events.XRPCStreamEvent 86 + Bevts []*events.XRPCStreamEvent 87 + } 88 + 89 + func (sd *streamDiffer) PushA(evt *events.XRPCStreamEvent) { 90 + ix := findEvt(evt, sd.Bevts) 91 + if ix < 0 { 92 + sd.Aevts = append(sd.Aevts, evt) 93 + return 94 + } 95 + 96 + switch evtOp(evt) { 97 + case "#commit": 98 + e := evt.RepoCommit 99 + oe := sd.Bevts[ix].RepoCommit 100 + 101 + if len(e.Blocks) != len(oe.Blocks) { 102 + fmt.Printf("seq %d (A) and seq %d (B) have different carslice lengths: %d != %d", e.Seq, oe.Seq, len(e.Blocks), len(oe.Blocks)) 103 + } 104 + default: 105 + } 106 + 107 + } 108 + 109 + func (sd *streamDiffer) PushB(evt *events.XRPCStreamEvent) { 110 + 111 + } 112 + 113 + func evtOp(evt *events.XRPCStreamEvent) string { 114 + switch { 115 + case evt.Error != nil: 116 + return "ERROR" 117 + case evt.RepoCommit != nil: 118 + return "#commit" 119 + case evt.RepoHandle != nil: 120 + return "#handle" 121 + case evt.RepoInfo != nil: 122 + return "#info" 123 + case evt.RepoMigrate != nil: 124 + return "#migrate" 125 + case evt.RepoTombstone != nil: 126 + return "#tombstone" 127 + default: 128 + return "unknown" 129 + } 130 + } 131 + 132 + func sameCommit(a, b *comatproto.SyncSubscribeRepos_Commit) bool { 133 + return a.Repo == b.Repo && cidStr(a.Prev) == cidStr(b.Prev) 134 + } 135 + 136 + func findEvt(evt *events.XRPCStreamEvent, list []*events.XRPCStreamEvent) int { 137 + evtop := evtOp(evt) 138 + 139 + for i, oe := range list { 140 + if evtop != evtOp(oe) { 141 + continue 142 + } 143 + 144 + switch { 145 + case evt.RepoCommit != nil: 146 + if sameCommit(evt.RepoCommit, oe.RepoCommit) { 147 + return i 148 + } 149 + case evt.RepoHandle != nil: 150 + panic("not handling handle updates yet") 151 + case evt.RepoMigrate != nil: 152 + panic("not handling repo migrates yet") 153 + default: 154 + panic("unhandled event type: " + evtop) 155 + } 156 + } 157 + 158 + return -1 159 + }
+3 -2
indexer/indexer.go
··· 84 84 ctx, span := otel.Tracer("indexer").Start(ctx, "HandleRepoEvent") 85 85 defer span.End() 86 86 87 - log.Infow("Handling Repo Event!", "uid", evt.User) 87 + log.Debugw("Handling Repo Event!", "uid", evt.User) 88 88 89 89 var outops []*comatproto.SyncSubscribeRepos_RepoOp 90 90 for _, op := range evt.Ops { ··· 113 113 114 114 } 115 115 116 - log.Infow("Sending event", "did", did) 116 + log.Debugw("Sending event", "did", did) 117 117 if err := ix.events.AddEvent(ctx, &events.XRPCStreamEvent{ 118 118 RepoCommit: &comatproto.SyncSubscribeRepos_Commit{ 119 119 Repo: did, ··· 867 867 } 868 868 869 869 // TODO: max size on these? A malicious PDS could just send us a petabyte sized repo here and kill us 870 + log.Infow("SyncGetRepo", "did", ai.Did, "user", ai.Handle, "from", from) 870 871 repo, err := comatproto.SyncGetRepo(ctx, c, ai.Did, from, "") 871 872 if err != nil { 872 873 return fmt.Errorf("failed to fetch repo: %w", err)