like malachite (atproto-lastfm-importer) but in go and bluer
go spotify tealfm lastfm atproto
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

sync/record: cache normalizations

karitham d9d69980 e90f6de3

+57 -39
+1 -1
flake.nix
··· 19 19 name = "lazuli"; 20 20 version = "0.1.7"; 21 21 src = pkgs.nix-gitignore.gitignoreSource [ "*.csv" "*.zip" "*.json" ] ./.; 22 - vendorHash = "sha256-O6R8jC8Ms5gsY2FUmuL8lTGTODfMW1CsSWuWbN27zeY="; 22 + vendorHash = "sha256-KnWoZ5UK8eigYw5uMSsLu4DIhzkSXmVHaE51Mr6hFmA="; 23 23 ldflags = [ 24 24 "-X" 25 25 "main.Version=${version}"
+1 -1
go.mod
··· 7 7 github.com/failsafe-go/failsafe-go v0.9.5 8 8 github.com/urfave/cli/v3 v3.6.2 9 9 go.etcd.io/bbolt v1.4.3 10 - golang.org/x/text v0.33.0 11 10 ) 12 11 13 12 require ( ··· 26 25 gitlab.com/yawning/tuplehash v0.0.0-20230713102510-df83abbf9a02 // indirect 27 26 go.yaml.in/yaml/v2 v2.4.3 // indirect 28 27 golang.org/x/crypto v0.47.0 // indirect 28 + golang.org/x/sync v0.19.0 // indirect 29 29 golang.org/x/sys v0.40.0 // indirect 30 30 golang.org/x/time v0.14.0 // indirect 31 31 google.golang.org/protobuf v1.36.11 // indirect
+22 -37
sync/record.go
··· 7 7 "unicode" 8 8 9 9 "github.com/bluesky-social/indigo/atproto/syntax" 10 - "golang.org/x/text/unicode/norm" 11 10 ) 12 11 13 12 type ExistingRecord struct { ··· 24 23 result.Grow(len(s)) 25 24 26 25 for _, r := range s { 27 - if unicode.Is(unicode.Mn, r) { 28 - continue 29 - } 30 - decomposed := norm.NFKD.String(string(r)) 31 - if len(decomposed) > 0 { 32 - result.WriteByte(decomposed[0]) 33 - } else { 26 + if r >= 128 || unicode.IsLetter(r) || unicode.IsNumber(r) { 34 27 result.WriteRune(r) 35 28 } 36 29 } 37 30 38 - s = result.String() 39 - s = strings.Map(func(r rune) rune { 40 - if r >= 128 { 41 - return r 42 - } 43 - if unicode.IsLetter(r) || unicode.IsNumber(r) { 44 - return r 45 - } 46 - return -1 47 - }, s) 48 - 49 - return s 31 + return result.String() 50 32 } 51 33 52 34 type PlayRecord struct { ··· 61 43 RecordingMbId string `json:"recordingMbId,omitempty"` 62 44 OriginUrl string `json:"originUrl"` 63 45 MsPlayed int `json:"msPlayed,omitempty"` 46 + 47 + normalizedTrack string `json:"-"` 48 + normalizedArtist string `json:"-"` 64 49 } 65 50 66 51 func (r PlayRecord) ArtistName() string { ··· 71 56 } 72 57 73 58 func (r PlayRecord) normalizeArtist() string { 74 - return normalizeString(r.ArtistName()) 59 + if r.normalizedArtist != "" { 60 + return r.normalizedArtist 61 + } 62 + 63 + r.normalizedArtist = normalizeString(r.ArtistName()) 64 + 65 + return r.normalizedArtist 75 66 } 76 67 77 68 func (r PlayRecord) normalizeTrack() string { 78 - return normalizeString(r.TrackName) 69 + if r.normalizedTrack != "" { 70 + return r.normalizedTrack 71 + } 72 + 73 + r.normalizedTrack = normalizeString(r.TrackName) 74 + 75 + return r.normalizedTrack 79 76 } 80 77 81 78 func (r PlayRecord) hasMBID() bool { ··· 84 81 return true 85 82 } 86 83 } 84 + 87 85 return r.RecordingMbId != "" 88 86 } 89 87 ··· 92 90 } 93 91 94 92 func (r PlayRecord) betterThan(other PlayRecord) bool { 95 - if r.isLastFM() && !other.isLastFM() { 96 - return true 97 - } 98 - if !r.isLastFM() && other.isLastFM() { 99 - return false 100 - } 101 - // Both same source, prefer the one with MBID 102 - if r.hasMBID() && !other.hasMBID() { 103 - return true 104 - } 105 - return false 93 + return (r.hasMBID() && !other.hasMBID()) || (r.isLastFM() && !other.isLastFM()) 106 94 } 107 95 108 96 func (r PlayRecord) IsDuplicate(other PlayRecord, tolerance time.Duration) (bool, bool) { ··· 118 106 } 119 107 120 108 diff := r.PlayedTime.Sub(other.PlayedTime.Time) 121 - if diff < 0 { 122 - diff = -diff 123 - } 124 - return diff <= tolerance 109 + return max(diff, -diff) <= tolerance 125 110 } 126 111 127 112 func (r PlayRecord) Time() time.Time {
+33
sync/record_test.go
··· 1 1 package sync 2 2 3 3 import ( 4 + "fmt" 4 5 "testing" 5 6 "time" 6 7 ··· 438 439 }) 439 440 } 440 441 } 442 + 443 + func BenchmarkMergeRecords(b *testing.B) { 444 + // Generate test data with multiple sources and items 445 + numSources := 10 446 + itemsPerSource := 1000 447 + tolerance := 10 * time.Minute 448 + baseTime := time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC) 449 + 450 + sources := make([][]PlayRecord, numSources) 451 + for i := range numSources { 452 + sources[i] = make([]PlayRecord, itemsPerSource) 453 + for j := range itemsPerSource { 454 + sources[i][j] = PlayRecord{ 455 + Type: "app.bsky.feed.post", 456 + TrackName: fmt.Sprintf("Song %d", (i+j)%100), 457 + Artists: []PlayRecordArtist{{ArtistName: fmt.Sprintf("Artist %d", i%20)}}, 458 + PlayedTime: Timestamp{Time: baseTime.Add(time.Duration(i*itemsPerSource+j) * time.Minute)}, 459 + SubmissionClientAgent: DefaultClientAgent, 460 + MusicServiceBaseDomain: []string{MusicServiceLastFM, MusicServiceSpotify}[i%2], 461 + OriginUrl: "https://example.com", 462 + MsPlayed: 180000, 463 + } 464 + if (i+j)%3 == 0 { 465 + sources[i][j].RecordingMbId = "mbid-123" 466 + } 467 + } 468 + } 469 + 470 + for b.Loop() { 471 + kway.Merge(sources, tolerance) 472 + } 473 + }