like malachite (atproto-lastfm-importer) but in go and bluer
go spotify tealfm lastfm atproto
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

refactor merging into proper kway minheap merge

karitham 9f6aa587 3cc2546c

+429 -744
+19 -19
README.md
··· 21 21 22 22 ### Commands 23 23 24 - | Command | Usage | 25 - | :--- | :--- | 24 + | Command | Usage | 25 + | :------- | :------------------------------------------------------- | 26 26 | `export` | Parse and merge Last.fm/Spotify exports into a JSON file | 27 - | `import` | Import new records to Bluesky (auto-skips existing) | 28 - | `sync` | Refresh the local cache with records from Bluesky | 29 - | `stats` | Show database status and daily rate limit consumption | 30 - | `failed` | List records that failed to import | 31 - | `retry` | Attempt to re-import failed records | 32 - | `dedupe` | Remove duplicate records from your Bluesky profile | 33 - | `debug` | Dump raw records from Bluesky for troubleshooting | 27 + | `import` | Import new records to Bluesky (auto-skips existing) | 28 + | `sync` | Refresh the local cache with records from Bluesky | 29 + | `stats` | Show database status and daily rate limit consumption | 30 + | `failed` | List records that failed to import | 31 + | `retry` | Attempt to re-import failed records | 32 + | `dedupe` | Remove duplicate records from your Bluesky profile | 33 + | `debug` | Dump raw records from Bluesky for troubleshooting | 34 34 35 35 ### Advanced Options 36 36 ··· 41 41 42 42 ## Environment Variables 43 43 44 - | Variable | Description | 45 - | ----------------- | -------------------------------------------- | 46 - | `LAZULI_HANDLE` | Bluesky handle (e.g., `user.bsky.social`) | 47 - | `LAZULI_PASSWORD` | Bluesky app password | 48 - | `LAZULI_LASTFM` | Path to Last.fm CSV file | 49 - | `LAZULI_SPOTIFY` | Path to Spotify JSON file/directory/zip | 50 - | `LAZULI_MODE` | Import mode: `lastfm`, `spotify`, `combined` | 51 - | `LAZULI_DRY_RUN` | Preview without publishing | 52 - | `LAZULI_VERBOSE` | Enable verbose logging | 53 - | `LAZULI_REVERSE` | Process records in reverse order | 44 + | Variable | Description | 45 + | ----------------- | ----------------------------------------- | 46 + | `LAZULI_HANDLE` | Bluesky handle (e.g., `user.bsky.social`) | 47 + | `LAZULI_PASSWORD` | Bluesky app password | 48 + | `LAZULI_LASTFM` | Path to Last.fm CSV file | 49 + | `LAZULI_SPOTIFY` | Path to Spotify JSON file/directory/zip | 50 + | `LAZULI_DRY_RUN` | Preview without publishing | 51 + | `LAZULI_VERBOSE` | Enable verbose logging | 52 + | `LAZULI_REVERSE` | Process records in reverse order | 54 53 55 54 ## Input Formats 56 55 ··· 71 70 The recommended way to use Spotify data is by passing the **ZIP archive** you receive from Spotify directly. Lazuli will automatically find and parse all streaming history files within it. 72 71 73 72 Lazuli accepts: 73 + 74 74 - **ZIP archives** containing extended history (Recommended) 75 75 - Directories containing `Streaming_History_Audio_*.json` files 76 76 - Single `Streaming_History_Audio_*.json` files
+96
kway/merge.go
··· 1 + package kway 2 + 3 + import ( 4 + "container/heap" 5 + "time" 6 + ) 7 + 8 + // Mergeable defines the interface that types must implement to be used with the generic merge function. 9 + type Mergeable[T any] interface { 10 + Time() time.Time 11 + IsDuplicate(other T, tol time.Duration) (isMatch bool, preferThis bool) 12 + } 13 + 14 + // heapItem represents an item in the merge heap, tracking which source it came from 15 + // and its position within that source. 16 + type heapItem[T Mergeable[T]] struct { 17 + Value T 18 + SourceIdx int 19 + ElementIdx int 20 + } 21 + 22 + // mergeHeap implements a min-heap for mergeable items using the Compare method. 23 + type mergeHeap[T Mergeable[T]] []heapItem[T] 24 + 25 + func (h mergeHeap[T]) Len() int { return len(h) } 26 + func (h mergeHeap[T]) Swap(i, j int) { h[i], h[j] = h[j], h[i] } 27 + func (h mergeHeap[T]) Less(i, j int) bool { return h[i].Value.Time().Before(h[j].Value.Time()) } 28 + func (h *mergeHeap[T]) Push(x any) { *h = append(*h, x.(heapItem[T])) } 29 + func (h *mergeHeap[T]) Pop() any { 30 + old := *h 31 + n := len(old) 32 + item := old[n-1] 33 + *h = old[0 : n-1] 34 + return item 35 + } 36 + 37 + // Merge performs a k-way merge of multiple sorted slices of mergeable items. 38 + // It combines the sources while removing duplicates within the specified tolerance. 39 + // The result is sorted according to the Compare method of the items. 40 + func Merge[T Mergeable[T]](sources [][]T, tolerance time.Duration) []T { 41 + h := &mergeHeap[T]{} 42 + heap.Init(h) 43 + 44 + // Initialize heap with first item from each source 45 + for i, src := range sources { 46 + if len(src) > 0 { 47 + heap.Push(h, heapItem[T]{Value: src[0], SourceIdx: i, ElementIdx: 0}) 48 + } 49 + } 50 + 51 + result := make([]T, 0) 52 + window := make([]T, 0) 53 + 54 + // Process items from the heap 55 + for h.Len() > 0 { 56 + curr := heap.Pop(h).(heapItem[T]) 57 + 58 + // Push the next item from the same source 59 + if curr.ElementIdx+1 < len(sources[curr.SourceIdx]) { 60 + heap.Push(h, heapItem[T]{ 61 + Value: sources[curr.SourceIdx][curr.ElementIdx+1], 62 + SourceIdx: curr.SourceIdx, 63 + ElementIdx: curr.ElementIdx + 1, 64 + }) 65 + } 66 + 67 + currItem := curr.Value 68 + 69 + // Evict items from window that are now older than tolerance relative to currItem 70 + for len(window) > 0 && currItem.Time().Sub(window[0].Time()) > tolerance { 71 + result = append(result, window[0]) 72 + window = window[1:] 73 + } 74 + 75 + // Check for duplicates in window 76 + found := false 77 + for i, existing := range window { 78 + if isMatch, preferCurr := currItem.IsDuplicate(existing, tolerance); isMatch { 79 + if preferCurr { 80 + window[i] = currItem 81 + } 82 + 83 + found = true 84 + break 85 + } 86 + } 87 + 88 + if !found { 89 + window = append(window, currItem) 90 + } 91 + } 92 + 93 + // Flush remaining window 94 + result = append(result, window...) 95 + return result 96 + }
+122
kway/merge_test.go
··· 1 + package kway 2 + 3 + import ( 4 + "testing" 5 + "time" 6 + ) 7 + 8 + // TestPlayRecord is a simple implementation of Mergeable for testing 9 + type TestPlayRecord struct { 10 + TrackName string 11 + Artist string 12 + time time.Time 13 + Source string // "lastfm" or "spotify" 14 + HasMBID bool 15 + } 16 + 17 + func (r TestPlayRecord) IsDuplicate(other TestPlayRecord, tolerance time.Duration) (bool, bool) { 18 + return r.SameAs(other, tolerance), r.BetterThan(other) 19 + } 20 + 21 + func (r TestPlayRecord) SameAs(other TestPlayRecord, tolerance time.Duration) bool { 22 + if r.TrackName != other.TrackName { 23 + return false 24 + } 25 + if r.Artist != other.Artist { 26 + return false 27 + } 28 + 29 + diff := r.time.Sub(other.time) 30 + if diff < 0 { 31 + diff = -diff 32 + } 33 + return diff <= tolerance 34 + } 35 + 36 + func (r TestPlayRecord) BetterThan(other TestPlayRecord) bool { 37 + if r.Source == "lastfm" && other.Source != "lastfm" { 38 + return true 39 + } 40 + if r.Source != "lastfm" && other.Source == "lastfm" { 41 + return false 42 + } 43 + return r.HasMBID && !other.HasMBID 44 + } 45 + 46 + func (r TestPlayRecord) Time() time.Time { 47 + return r.time 48 + } 49 + 50 + func TestMerge(t *testing.T) { 51 + baseTime := time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC) 52 + 53 + // Test case where duplicates are very close in time and should be merged 54 + lastfmRecords := []TestPlayRecord{ 55 + {TrackName: "Song A", Artist: "Artist", time: baseTime, Source: "lastfm"}, 56 + {TrackName: "Song D", Artist: "Artist", time: baseTime.Add(3 * time.Hour), Source: "lastfm"}, 57 + } 58 + 59 + spotifyRecords := []TestPlayRecord{ 60 + {TrackName: "Song A", Artist: "Artist", time: baseTime.Add(3 * time.Minute), Source: "spotify"}, // Duplicate within tolerance 61 + {TrackName: "Song B", Artist: "Artist", time: baseTime.Add(time.Hour), Source: "spotify"}, 62 + {TrackName: "Song C", Artist: "Artist", time: baseTime.Add(2 * time.Hour), Source: "spotify"}, 63 + } 64 + 65 + result := Merge([][]TestPlayRecord{lastfmRecords, spotifyRecords}, 10*time.Minute) 66 + 67 + if len(result) != 4 { 68 + t.Errorf("Expected 4 results, got %d", len(result)) 69 + } 70 + 71 + // Check order - should be sorted by time 72 + expectedOrder := []string{"Song A", "Song B", "Song C", "Song D"} 73 + for i, expected := range expectedOrder { 74 + if i >= len(result) { 75 + t.Errorf("Missing result at position %d", i) 76 + break 77 + } 78 + if result[i].TrackName != expected { 79 + t.Errorf("Result %d should be %s, got %s", i, expected, result[i].TrackName) 80 + } 81 + } 82 + 83 + // Check that LastFM version is preferred for duplicate Song A 84 + if result[0].Source != "lastfm" { 85 + t.Errorf("Duplicate Song A should be from lastfm, got %s", result[0].Source) 86 + } 87 + } 88 + 89 + func TestMergeExactDuplicate(t *testing.T) { 90 + baseTime := time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC) 91 + 92 + lastfmRecords := []TestPlayRecord{ 93 + {TrackName: "Song A", Artist: "Artist", time: baseTime, Source: "lastfm"}, 94 + } 95 + 96 + spotifyRecords := []TestPlayRecord{ 97 + {TrackName: "Song A", Artist: "Artist", time: baseTime, Source: "spotify"}, // Exact duplicate 98 + } 99 + 100 + result := Merge([][]TestPlayRecord{lastfmRecords, spotifyRecords}, 0) 101 + 102 + if len(result) != 1 { 103 + t.Errorf("Expected 1 result, got %d", len(result)) 104 + } 105 + 106 + // Check that LastFM version is preferred 107 + if result[0].Source != "lastfm" { 108 + t.Errorf("Duplicate should be from lastfm, got %s", result[0].Source) 109 + } 110 + } 111 + 112 + func TestMergeEmptySources(t *testing.T) { 113 + result := Merge([][]TestPlayRecord{}, 0) 114 + if len(result) != 0 { 115 + t.Errorf("Expected empty result, got %d items", len(result)) 116 + } 117 + 118 + result = Merge([][]TestPlayRecord{{}, {}}, 0) 119 + if len(result) != 0 { 120 + t.Errorf("Expected empty result from empty sources, got %d items", len(result)) 121 + } 122 + }
+8 -22
main.go
··· 12 12 "time" 13 13 14 14 "tangled.org/karitham.dev/lazuli/cache" 15 + "tangled.org/karitham.dev/lazuli/kway" 15 16 "tangled.org/karitham.dev/lazuli/sources/lastfm" 16 17 "tangled.org/karitham.dev/lazuli/sources/spotify" 17 18 "tangled.org/karitham.dev/lazuli/sync" ··· 486 487 } 487 488 a.log.Info("Loaded Spotify records", slog.Int("count", len(spotifyRecords))) 488 489 489 - mergedRecords, stats := sync.MergeRecords(lastfmRecords, spotifyRecords, tolerance) 490 - a.log.Info("Merged records", slog.Int("merged_total", stats.MergedTotal), slog.Int("duplicates_removed", stats.DuplicatesRemoved)) 490 + mergedRecords := kway.Merge([][]sync.PlayRecord{lastfmRecords, spotifyRecords}, tolerance) 491 + 492 + a.log.Info( 493 + "Merged records", 494 + slog.Int("merged_total", len(mergedRecords)), 495 + slog.Int("duplicates_removed", len(lastfmRecords)+len(spotifyRecords)-len(mergedRecords)), 496 + ) 491 497 492 498 if reverse { 493 499 slices.Reverse(mergedRecords) ··· 506 512 507 513 lastfmPath := cmd.String("lastfm") 508 514 spotifyPath := cmd.String("spotify") 509 - modeStr := cmd.String("mode") 510 515 dryRun := cmd.Bool("dry-run") 511 516 reverse := cmd.Bool("reverse") 512 517 fresh := cmd.Bool("fresh") ··· 522 527 } 523 528 } 524 529 525 - var mode sync.ImportMode 526 - switch modeStr { 527 - case "lastfm": 528 - mode = sync.ImportModeLastFM 529 - case "spotify": 530 - mode = sync.ImportModeSpotify 531 - case "combined": 532 - mode = sync.ImportModeCombined 533 - default: 534 - return fmt.Errorf("invalid mode: %s (must be lastfm, spotify, or combined)", modeStr) 535 - } 536 - 537 530 records, totalCount, err := sync.LoadRecordsForImport(ctx, sync.ImportOptions{ 538 531 LastFMPath: lastfmPath, 539 532 SpotifyPath: spotifyPath, 540 - Mode: mode, 541 533 Tolerance: tolerance, 542 534 LastFMParser: lastfm.Parser{}, 543 535 SpotifyParser: spotify.Parser{}, ··· 896 888 var importFlags = []cli.Flag{ 897 889 lastfmFlag, 898 890 spotifyFlag, 899 - &cli.StringFlag{ 900 - Name: "mode", 901 - Usage: "Import mode: lastfm, spotify, combined (default: combined)", 902 - Value: "combined", 903 - Sources: cli.EnvVars("LAZULI_MODE"), 904 - }, 905 891 &cli.BoolFlag{ 906 892 Name: "dry-run", 907 893 Usage: "Preview without publishing",
-11
sync/config.go
··· 18 18 19 19 var BaseRetryDelay = 2 * time.Second 20 20 21 - type ImportMode string 22 - 23 - const ( 24 - ImportModeLastFM ImportMode = "lastfm" 25 - ImportModeSpotify ImportMode = "spotify" 26 - ImportModeCombined ImportMode = "combined" 27 - ImportModeSync ImportMode = "sync" 28 - ) 29 - 30 21 type Config struct { 31 22 RecordType string `json:"recordType"` 32 23 ClientAgent string `json:"clientAgent"` ··· 35 26 CacheTTL time.Duration `json:"cacheTTL"` 36 27 CacheVersion int `json:"cacheVersion"` 37 28 SlingshotResolverURL string `json:"slingshotResolverURL"` 38 - ImportMode ImportMode `json:"importMode"` 39 29 UserAgent string `json:"userAgent"` 40 30 } 41 31 ··· 47 37 CacheTTL: CacheTTL, 48 38 CacheVersion: CacheVersion, 49 39 SlingshotResolverURL: SlingshotResolverURL, 50 - ImportMode: ImportModeLastFM, 51 40 } 52 41 53 42 type PublishResult struct {
+5 -12
sync/import.go
··· 9 9 "os" 10 10 "strings" 11 11 "time" 12 + 13 + "tangled.org/karitham.dev/lazuli/kway" 12 14 ) 13 15 14 16 type Parser interface { ··· 50 52 type ImportOptions struct { 51 53 LastFMPath string 52 54 SpotifyPath string 53 - Mode ImportMode 54 55 Tolerance time.Duration 55 56 56 57 LastFMParser Parser ··· 61 62 var lastfmRecords, spotifyRecords []PlayRecord 62 63 var err error 63 64 64 - if opts.Mode == ImportModeLastFM || opts.Mode == ImportModeCombined { 65 + if opts.LastFMPath != "" { 65 66 lastfmRecords, err = ParseInput(ctx, opts.LastFMPath, opts.LastFMParser) 66 67 if err != nil { 67 68 return nil, 0, fmt.Errorf("parse lastfm: %w", err) 68 69 } 69 70 } 70 71 71 - if opts.Mode == ImportModeSpotify || opts.Mode == ImportModeCombined { 72 + if opts.SpotifyPath != "" { 72 73 spotifyRecords, err = ParseInput(ctx, opts.SpotifyPath, opts.SpotifyParser) 73 74 if err != nil { 74 75 return nil, 0, fmt.Errorf("parse spotify: %w", err) ··· 77 78 78 79 totalInput := len(lastfmRecords) + len(spotifyRecords) 79 80 80 - var mergedRecords []PlayRecord 81 - switch opts.Mode { 82 - case ImportModeCombined: 83 - mergedRecords, _ = MergeRecords(lastfmRecords, spotifyRecords, opts.Tolerance) 84 - case ImportModeLastFM: 85 - mergedRecords = lastfmRecords 86 - default: 87 - mergedRecords = spotifyRecords 88 - } 81 + mergedRecords := kway.Merge([][]PlayRecord{lastfmRecords, spotifyRecords}, opts.Tolerance) 89 82 90 83 return mergedRecords, totalInput, nil 91 84 }
-1
sync/import_test.go
··· 113 113 114 114 // 3. Load Records 115 115 opts := sync.ImportOptions{ 116 - Mode: sync.ImportModeCombined, 117 116 Tolerance: 10 * time.Second, 118 117 LastFMParser: &mockParser{records: []sync.PlayRecord{rec1}}, 119 118 SpotifyParser: &mockParser{records: []sync.PlayRecord{rec2, rec3}},
+30 -115
sync/record.go
··· 2 2 3 3 import ( 4 4 "fmt" 5 - "sort" 6 5 "strings" 7 6 "time" 8 7 "unicode" ··· 71 70 return "Unknown Artist" 72 71 } 73 72 74 - func (r PlayRecord) NormalizedArtist() string { 73 + func (r PlayRecord) normalizeArtist() string { 75 74 return normalizeString(r.ArtistName()) 76 75 } 77 76 78 - func (r PlayRecord) NormalizedTrack() string { 77 + func (r PlayRecord) normalizeTrack() string { 79 78 return normalizeString(r.TrackName) 80 79 } 81 80 82 - func (r PlayRecord) HasMBID() bool { 81 + func (r PlayRecord) hasMBID() bool { 83 82 for _, a := range r.Artists { 84 83 if a.ArtistMbId != "" { 85 84 return true ··· 88 87 return r.RecordingMbId != "" 89 88 } 90 89 91 - func (r PlayRecord) IsLastFM() bool { 90 + func (r PlayRecord) isLastFM() bool { 92 91 return r.MusicServiceBaseDomain == MusicServiceLastFM 93 92 } 94 93 95 94 func (r PlayRecord) BetterThan(other PlayRecord) bool { 96 - if r.IsLastFM() && !other.IsLastFM() { 95 + if r.isLastFM() && !other.isLastFM() { 97 96 return true 98 97 } 99 - if !r.IsLastFM() && other.IsLastFM() { 98 + if !r.isLastFM() && other.isLastFM() { 100 99 return false 101 100 } 102 101 // Both same source, prefer the one with MBID 103 - if r.HasMBID() && !other.HasMBID() { 102 + if r.hasMBID() && !other.hasMBID() { 104 103 return true 105 104 } 106 105 return false 107 106 } 108 107 108 + func (r PlayRecord) IsDuplicate(other PlayRecord, tolerance time.Duration) (bool, bool) { 109 + return r.sameAs(other, tolerance), r.BetterThan(other) 110 + } 111 + 112 + func (r PlayRecord) sameAs(other PlayRecord, tolerance time.Duration) bool { 113 + if r.normalizeTrack() != other.normalizeTrack() { 114 + return false 115 + } 116 + if r.normalizeArtist() != other.normalizeArtist() { 117 + return false 118 + } 119 + 120 + diff := r.PlayedTime.Sub(other.PlayedTime.Time) 121 + if diff < 0 { 122 + diff = -diff 123 + } 124 + return diff <= tolerance 125 + } 126 + 127 + func (r PlayRecord) Time() time.Time { 128 + return r.PlayedTime.Time 129 + } 130 + 109 131 type PlayRecordArtist struct { 110 132 ArtistName string `json:"artistName"` 111 133 ArtistMbId string `json:"artistMbId,omitempty"` 112 - } 113 - 114 - type MergeStats struct { 115 - LastFMTotal int `json:"lastfmTotal"` 116 - SpotifyTotal int `json:"spotifyTotal"` 117 - DuplicatesRemoved int `json:"duplicatesRemoved"` 118 - LastFMUnique int `json:"lastfmUnique"` 119 - SpotifyUnique int `json:"spotifyUnique"` 120 - MergedTotal int `json:"mergedTotal"` 121 134 } 122 135 123 136 const ( ··· 149 162 keys[i] = string(tid) 150 163 } 151 164 return keys 152 - } 153 - 154 - func MergeRecords(lastfm, spotify []PlayRecord, tolerance time.Duration) ([]PlayRecord, MergeStats) { 155 - stats := MergeStats{ 156 - LastFMTotal: len(lastfm), 157 - SpotifyTotal: len(spotify), 158 - } 159 - 160 - // key is normalizedTrack|normalizedArtist|bucket 161 - recordsMap := make(map[string]PlayRecord) 162 - 163 - process := func(records []PlayRecord) { 164 - for _, rec := range records { 165 - track := rec.NormalizedTrack() 166 - artist := rec.NormalizedArtist() 167 - timestamp := rec.PlayedTime.Time 168 - 169 - found := false 170 - if tolerance > 0 { 171 - bucket := timestamp.Unix() / int64(tolerance.Seconds()) 172 - // Check current and adjacent buckets 173 - for b := bucket - 1; b <= bucket+1; b++ { 174 - key := fmt.Sprintf("%s|%s|%d", track, artist, b) 175 - if existing, ok := recordsMap[key]; ok { 176 - diff := timestamp.Sub(existing.PlayedTime.Time) 177 - if diff < 0 { 178 - diff = -diff 179 - } 180 - if diff <= tolerance { 181 - if rec.BetterThan(existing) { 182 - recordsMap[key] = rec 183 - } 184 - stats.DuplicatesRemoved++ 185 - found = true 186 - break 187 - } 188 - } 189 - } 190 - if !found { 191 - key := fmt.Sprintf("%s|%s|%d", track, artist, bucket) 192 - recordsMap[key] = rec 193 - if rec.IsLastFM() { 194 - stats.LastFMUnique++ 195 - } else { 196 - stats.SpotifyUnique++ 197 - } 198 - } 199 - } else { 200 - key := fmt.Sprintf("%s|%s|%s", track, artist, timestamp.Format(time.RFC3339)) 201 - if existing, ok := recordsMap[key]; ok { 202 - if rec.BetterThan(existing) { 203 - recordsMap[key] = rec 204 - } 205 - stats.DuplicatesRemoved++ 206 - } else { 207 - recordsMap[key] = rec 208 - if rec.IsLastFM() { 209 - stats.LastFMUnique++ 210 - } else { 211 - stats.SpotifyUnique++ 212 - } 213 - } 214 - } 215 - } 216 - } 217 - 218 - process(lastfm) 219 - process(spotify) 220 - 221 - result := make([]PlayRecord, 0, len(recordsMap)) 222 - for _, rec := range recordsMap { 223 - result = append(result, rec) 224 - } 225 - 226 - sort.Slice(result, func(i, j int) bool { 227 - if !result[i].PlayedTime.Equal(result[j].PlayedTime.Time) { 228 - return result[i].PlayedTime.Before(result[j].PlayedTime.Time) 229 - } 230 - return result[i].TrackName < result[j].TrackName 231 - }) 232 - 233 - stats.MergedTotal = len(result) 234 - return result, stats 235 - } 236 - 237 - func (r PlayRecord) SameAs(other PlayRecord, tolerance time.Duration) bool { 238 - if r.NormalizedTrack() != other.NormalizedTrack() { 239 - return false 240 - } 241 - if r.NormalizedArtist() != other.NormalizedArtist() { 242 - return false 243 - } 244 - 245 - diff := r.PlayedTime.Sub(other.PlayedTime.Time) 246 - if diff < 0 { 247 - diff = -diff 248 - } 249 - return diff <= tolerance 250 165 } 251 166 252 167 func FilterNew(records []PlayRecord, existing []ExistingRecord, processed map[string]bool) []PlayRecord {
+149 -564
sync/record_test.go
··· 3 3 import ( 4 4 "testing" 5 5 "time" 6 + 7 + "tangled.org/karitham.dev/lazuli/kway" 6 8 ) 7 9 8 10 func TestCreateRecordKey(t *testing.T) { ··· 183 185 } 184 186 } 185 187 186 - func TestMergeRecords(t *testing.T) { 188 + func TestMergeRecordsComprehensive(t *testing.T) { 187 189 baseTime := time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC) 188 190 189 191 tests := []struct { 190 - name string 191 - lastfm []PlayRecord 192 - spotify []PlayRecord 193 - tolerance time.Duration 194 - expectedLen int 195 - expectedDuplicates int 196 - expectedLastFMUnique int 197 - expectedSpotifyUnique int 192 + name string 193 + lastfm []PlayRecord 194 + spotify []PlayRecord 195 + tolerance time.Duration 196 + expectedLen int 197 + expectedMergedTotal int 198 + expectedFirstTrack string 199 + expectedOrder []string // track names in expected order 198 200 }{ 199 201 { 200 - name: "empty input both", 201 - lastfm: []PlayRecord{}, 202 - spotify: []PlayRecord{}, 203 - tolerance: DefaultCrossSourceTolerance, 204 - expectedLen: 0, 202 + name: "both slices empty", 203 + lastfm: []PlayRecord{}, 204 + spotify: []PlayRecord{}, 205 + tolerance: 0, 206 + expectedLen: 0, 207 + expectedMergedTotal: 0, 205 208 }, 206 209 { 207 - name: "empty input lastfm only", 208 - lastfm: []PlayRecord{}, 209 - spotify: []PlayRecord{ 210 - { 211 - TrackName: "Song", 212 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 213 - PlayedTime: Timestamp{Time: baseTime}, 214 - MusicServiceBaseDomain: MusicServiceSpotify, 215 - }, 216 - }, 217 - tolerance: DefaultCrossSourceTolerance, 218 - expectedLen: 1, 219 - expectedLastFMUnique: 0, 220 - expectedSpotifyUnique: 1, 221 - }, 222 - { 223 - name: "empty input spotify only", 210 + name: "only lastfm records", 224 211 lastfm: []PlayRecord{ 225 - { 226 - TrackName: "Song", 227 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 228 - PlayedTime: Timestamp{Time: baseTime}, 229 - MusicServiceBaseDomain: MusicServiceLastFM, 230 - }, 212 + {TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 213 + {TrackName: "Song B", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Hour)}, MusicServiceBaseDomain: MusicServiceLastFM}, 231 214 }, 232 - spotify: []PlayRecord{}, 233 - tolerance: DefaultCrossSourceTolerance, 234 - expectedLen: 1, 235 - expectedLastFMUnique: 1, 236 - expectedSpotifyUnique: 0, 215 + spotify: []PlayRecord{}, 216 + tolerance: 0, 217 + expectedLen: 2, 218 + expectedMergedTotal: 2, 219 + expectedOrder: []string{"Song A", "Song B"}, 237 220 }, 238 221 { 239 - name: "same timestamp merged", 240 - lastfm: []PlayRecord{ 241 - { 242 - TrackName: "Song", 243 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 244 - PlayedTime: Timestamp{Time: baseTime}, 245 - MusicServiceBaseDomain: MusicServiceLastFM, 246 - }, 247 - }, 222 + name: "only spotify records", 223 + lastfm: []PlayRecord{}, 248 224 spotify: []PlayRecord{ 249 - { 250 - TrackName: "Song", 251 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 252 - PlayedTime: Timestamp{Time: baseTime}, 253 - MusicServiceBaseDomain: MusicServiceSpotify, 254 - }, 225 + {TrackName: "Song X", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceSpotify}, 226 + {TrackName: "Song Y", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Hour)}, MusicServiceBaseDomain: MusicServiceSpotify}, 255 227 }, 256 - tolerance: DefaultCrossSourceTolerance, 257 - expectedLen: 1, 258 - expectedDuplicates: 1, 259 - expectedLastFMUnique: 1, 260 - expectedSpotifyUnique: 0, 228 + tolerance: 0, 229 + expectedLen: 2, 230 + expectedMergedTotal: 2, 231 + expectedOrder: []string{"Song X", "Song Y"}, 261 232 }, 262 233 { 263 - name: "different songs not merged", 234 + name: "zero tolerance no duplicates", 264 235 lastfm: []PlayRecord{ 265 - { 266 - TrackName: "Song A", 267 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 268 - PlayedTime: Timestamp{Time: baseTime}, 269 - MusicServiceBaseDomain: MusicServiceLastFM, 270 - }, 236 + {TrackName: "Same Song", Artists: []PlayRecordArtist{{ArtistName: "Same Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 271 237 }, 272 238 spotify: []PlayRecord{ 273 - { 274 - TrackName: "Song B", 275 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 276 - PlayedTime: Timestamp{Time: baseTime}, 277 - MusicServiceBaseDomain: MusicServiceSpotify, 278 - }, 239 + {TrackName: "Same Song", Artists: []PlayRecordArtist{{ArtistName: "Same Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 279 240 }, 280 - tolerance: DefaultCrossSourceTolerance, 281 - expectedLen: 2, 282 - expectedDuplicates: 0, 241 + tolerance: 0, 242 + expectedLen: 2, 243 + expectedMergedTotal: 2, 283 244 }, 284 245 { 285 - name: "different artists not merged", 246 + name: "zero tolerance exact duplicate", 286 247 lastfm: []PlayRecord{ 287 - { 288 - TrackName: "Song", 289 - Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, 290 - PlayedTime: Timestamp{Time: baseTime}, 291 - MusicServiceBaseDomain: MusicServiceLastFM, 292 - }, 248 + {TrackName: "Same Song", Artists: []PlayRecordArtist{{ArtistName: "Same Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 293 249 }, 294 250 spotify: []PlayRecord{ 295 - { 296 - TrackName: "Song", 297 - Artists: []PlayRecordArtist{{ArtistName: "Artist B"}}, 298 - PlayedTime: Timestamp{Time: baseTime}, 299 - MusicServiceBaseDomain: MusicServiceSpotify, 300 - }, 251 + {TrackName: "Same Song", Artists: []PlayRecordArtist{{ArtistName: "Same Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceSpotify}, 301 252 }, 302 - tolerance: DefaultCrossSourceTolerance, 303 - expectedLen: 2, 304 - expectedDuplicates: 0, 253 + tolerance: 0, 254 + expectedLen: 1, 255 + expectedMergedTotal: 1, 256 + expectedFirstTrack: "Same Song", 305 257 }, 306 258 { 307 - name: "zero tolerance same timestamp merged", 259 + name: "within tolerance duplicate", 308 260 lastfm: []PlayRecord{ 309 - { 310 - TrackName: "Song", 311 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 312 - PlayedTime: Timestamp{Time: baseTime}, 313 - MusicServiceBaseDomain: MusicServiceLastFM, 314 - }, 261 + {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 315 262 }, 316 263 spotify: []PlayRecord{ 317 - { 318 - TrackName: "Song", 319 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 320 - PlayedTime: Timestamp{Time: baseTime}, 321 - MusicServiceBaseDomain: MusicServiceSpotify, 322 - }, 264 + {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(10 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 323 265 }, 324 - tolerance: 0, 325 - expectedLen: 1, 326 - expectedDuplicates: 1, 266 + tolerance: 30 * time.Second, 267 + expectedLen: 1, 268 + expectedMergedTotal: 1, 269 + expectedFirstTrack: "Song", 327 270 }, 328 271 { 329 - name: "partial overlap merged", 272 + name: "outside tolerance no duplicate", 330 273 lastfm: []PlayRecord{ 331 - { 332 - TrackName: "Song A", 333 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 334 - PlayedTime: Timestamp{Time: baseTime}, 335 - MusicServiceBaseDomain: MusicServiceLastFM, 336 - }, 337 - { 338 - TrackName: "Song B", 339 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 340 - PlayedTime: Timestamp{Time: baseTime.Add(1 * time.Hour)}, 341 - MusicServiceBaseDomain: MusicServiceLastFM, 342 - }, 274 + {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 343 275 }, 344 276 spotify: []PlayRecord{ 345 - { 346 - TrackName: "Song A", 347 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 348 - PlayedTime: Timestamp{Time: baseTime.Add(10 * time.Second)}, 349 - MusicServiceBaseDomain: MusicServiceSpotify, 350 - }, 351 - { 352 - TrackName: "Song C", 353 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 354 - PlayedTime: Timestamp{Time: baseTime.Add(2 * time.Hour)}, 355 - MusicServiceBaseDomain: MusicServiceSpotify, 356 - }, 277 + {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(60 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 357 278 }, 358 - tolerance: DefaultCrossSourceTolerance, 359 - expectedLen: 3, 360 - expectedDuplicates: 1, 279 + tolerance: 30 * time.Second, 280 + expectedLen: 2, 281 + expectedMergedTotal: 2, 361 282 }, 362 283 { 363 - name: "case insensitive matching", 284 + name: "time bucket boundary exact", 364 285 lastfm: []PlayRecord{ 365 - { 366 - TrackName: "SONG", 367 - Artists: []PlayRecordArtist{{ArtistName: "ARTIST"}}, 368 - PlayedTime: Timestamp{Time: baseTime}, 369 - MusicServiceBaseDomain: MusicServiceLastFM, 370 - }, 286 + {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 371 287 }, 372 288 spotify: []PlayRecord{ 373 - { 374 - TrackName: "song", 375 - Artists: []PlayRecordArtist{{ArtistName: "artist"}}, 376 - PlayedTime: Timestamp{Time: baseTime}, 377 - MusicServiceBaseDomain: MusicServiceSpotify, 378 - }, 289 + {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(29 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 379 290 }, 380 - tolerance: DefaultCrossSourceTolerance, 381 - expectedLen: 1, 382 - expectedDuplicates: 1, 291 + tolerance: 30 * time.Second, 292 + expectedLen: 1, 293 + expectedMergedTotal: 1, 383 294 }, 384 295 { 385 - name: "punctuation insensitive matching", 296 + name: "time bucket boundary crossed", 386 297 lastfm: []PlayRecord{ 387 - { 388 - TrackName: "Don't Stop!", 389 - Artists: []PlayRecordArtist{{ArtistName: "Queen"}}, 390 - PlayedTime: Timestamp{Time: baseTime}, 391 - MusicServiceBaseDomain: MusicServiceLastFM, 392 - }, 298 + {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 393 299 }, 394 300 spotify: []PlayRecord{ 395 - { 396 - TrackName: "Dont Stop", 397 - Artists: []PlayRecordArtist{{ArtistName: "Queen"}}, 398 - PlayedTime: Timestamp{Time: baseTime}, 399 - MusicServiceBaseDomain: MusicServiceSpotify, 400 - }, 301 + {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(31 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 401 302 }, 402 - tolerance: DefaultCrossSourceTolerance, 403 - expectedLen: 1, 404 - expectedDuplicates: 1, 303 + tolerance: 30 * time.Second, 304 + expectedLen: 2, 405 305 }, 406 306 { 407 - name: "many records no duplicates", 307 + name: "lastfm priority over spotify", 408 308 lastfm: []PlayRecord{ 409 - {TrackName: "Song 1", Artists: []PlayRecordArtist{{ArtistName: "Artist 1"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 410 - {TrackName: "Song 2", Artists: []PlayRecordArtist{{ArtistName: "Artist 2"}}, PlayedTime: Timestamp{Time: baseTime.Add(1 * time.Minute)}, MusicServiceBaseDomain: MusicServiceLastFM}, 411 - {TrackName: "Song 3", Artists: []PlayRecordArtist{{ArtistName: "Artist 3"}}, PlayedTime: Timestamp{Time: baseTime.Add(2 * time.Minute)}, MusicServiceBaseDomain: MusicServiceLastFM}, 309 + {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 412 310 }, 413 311 spotify: []PlayRecord{ 414 - {TrackName: "Song 4", Artists: []PlayRecordArtist{{ArtistName: "Artist 4"}}, PlayedTime: Timestamp{Time: baseTime.Add(3 * time.Minute)}, MusicServiceBaseDomain: MusicServiceSpotify}, 415 - {TrackName: "Song 5", Artists: []PlayRecordArtist{{ArtistName: "Artist 5"}}, PlayedTime: Timestamp{Time: baseTime.Add(4 * time.Minute)}, MusicServiceBaseDomain: MusicServiceSpotify}, 416 - {TrackName: "Song 6", Artists: []PlayRecordArtist{{ArtistName: "Artist 6"}}, PlayedTime: Timestamp{Time: baseTime.Add(5 * time.Minute)}, MusicServiceBaseDomain: MusicServiceSpotify}, 312 + {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(10 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 417 313 }, 418 - tolerance: DefaultCrossSourceTolerance, 419 - expectedLen: 6, 420 - expectedDuplicates: 0, 421 - expectedLastFMUnique: 3, 422 - expectedSpotifyUnique: 3, 314 + tolerance: 30 * time.Second, 315 + expectedLen: 1, 316 + expectedMergedTotal: 1, 317 + expectedFirstTrack: "Song", 423 318 }, 424 319 { 425 - name: "zero tolerance 1 second apart not merged", 320 + name: "same source with mbid preferred", 426 321 lastfm: []PlayRecord{ 427 - { 428 - TrackName: "Song", 429 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 430 - PlayedTime: Timestamp{Time: baseTime}, 431 - MusicServiceBaseDomain: MusicServiceLastFM, 432 - }, 433 - }, 434 - spotify: []PlayRecord{ 435 - { 436 - TrackName: "Song", 437 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 438 - PlayedTime: Timestamp{Time: baseTime.Add(1 * time.Second)}, 439 - MusicServiceBaseDomain: MusicServiceSpotify, 440 - }, 322 + {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM, RecordingMbId: "mbid-123"}, 323 + {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(10 * time.Second)}, MusicServiceBaseDomain: MusicServiceLastFM}, 441 324 }, 442 - tolerance: 0, 443 - expectedLen: 2, 444 - expectedDuplicates: 0, 325 + spotify: []PlayRecord{}, 326 + tolerance: 30 * time.Second, 327 + expectedLen: 1, 328 + expectedMergedTotal: 1, 329 + expectedFirstTrack: "Song", 445 330 }, 446 331 { 447 - name: "five minute tolerance 31 seconds apart merged", 332 + name: "case insensitive duplicate detection", 448 333 lastfm: []PlayRecord{ 449 - { 450 - TrackName: "Song", 451 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 452 - PlayedTime: Timestamp{Time: baseTime}, 453 - MusicServiceBaseDomain: MusicServiceLastFM, 454 - }, 334 + {TrackName: "song title", Artists: []PlayRecordArtist{{ArtistName: "artist name"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 455 335 }, 456 336 spotify: []PlayRecord{ 457 - { 458 - TrackName: "Song", 459 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 460 - PlayedTime: Timestamp{Time: baseTime.Add(31 * time.Second)}, 461 - MusicServiceBaseDomain: MusicServiceSpotify, 462 - }, 337 + {TrackName: "SONG TITLE", Artists: []PlayRecordArtist{{ArtistName: "ARTIST NAME"}}, PlayedTime: Timestamp{Time: baseTime.Add(10 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 463 338 }, 464 - tolerance: DefaultCrossSourceTolerance, 465 - expectedLen: 1, 466 - expectedDuplicates: 1, 339 + tolerance: 30 * time.Second, 340 + expectedLen: 1, 341 + expectedMergedTotal: 1, 467 342 }, 468 343 { 469 - name: "one minute tolerance 30 seconds apart merged", 344 + name: "multiple duplicates across time buckets", 470 345 lastfm: []PlayRecord{ 471 - { 472 - TrackName: "Song", 473 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 474 - PlayedTime: Timestamp{Time: baseTime}, 475 - MusicServiceBaseDomain: MusicServiceLastFM, 476 - }, 346 + {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 347 + {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(2 * time.Minute)}, MusicServiceBaseDomain: MusicServiceLastFM}, 477 348 }, 478 349 spotify: []PlayRecord{ 479 - { 480 - TrackName: "Song", 481 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 482 - PlayedTime: Timestamp{Time: baseTime.Add(30 * time.Second)}, 483 - MusicServiceBaseDomain: MusicServiceSpotify, 484 - }, 350 + {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(10 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 351 + {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(2*time.Minute + 10*time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 485 352 }, 486 - tolerance: time.Minute, 487 - expectedLen: 1, 488 - expectedDuplicates: 1, 353 + tolerance: 30 * time.Second, 354 + expectedLen: 2, 355 + expectedMergedTotal: 2, 489 356 }, 490 357 { 491 - name: "30 second tolerance 31 seconds apart not merged", 358 + name: "sorted by time then track name", 492 359 lastfm: []PlayRecord{ 493 - { 494 - TrackName: "Song", 495 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 496 - PlayedTime: Timestamp{Time: baseTime}, 497 - MusicServiceBaseDomain: MusicServiceLastFM, 498 - }, 360 + {TrackName: "A Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 361 + {TrackName: "B Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Hour)}, MusicServiceBaseDomain: MusicServiceLastFM}, 499 362 }, 500 363 spotify: []PlayRecord{ 501 - { 502 - TrackName: "Song", 503 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 504 - PlayedTime: Timestamp{Time: baseTime.Add(31 * time.Second)}, 505 - MusicServiceBaseDomain: MusicServiceSpotify, 506 - }, 364 + {TrackName: "A Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(30 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 507 365 }, 508 - tolerance: 30 * time.Second, 509 - expectedLen: 2, 510 - expectedDuplicates: 0, 366 + tolerance: 30 * time.Second, 367 + expectedLen: 2, 368 + expectedMergedTotal: 2, 369 + expectedOrder: []string{"A Song", "B Song"}, 511 370 }, 512 371 { 513 - name: "many consecutive deduplications", 372 + name: "many duplicates in same bucket", 514 373 lastfm: []PlayRecord{ 515 - {TrackName: "Written In Stone - KAYTRANADA Remix", Artists: []PlayRecordArtist{{ArtistName: "Robert Glasper"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 0, 55, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 516 - {TrackName: "Gum In My Mouth", Artists: []PlayRecordArtist{{ArtistName: "Butcher Brown", ArtistMbId: "c0937ba4-6869-456b-afd0-10335ae50245"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 3, 58, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "ab00c566-3038-4e5f-a5e4-264a9baf542c", RecordingMbId: "5d913a35-fbee-403d-8771-4a7e11013889"}, 517 - {TrackName: "Welcome to the World of the Plastic Beach", Artists: []PlayRecordArtist{{ArtistName: "Gorillaz", ArtistMbId: "e21857d5-3256-4547-afb3-4b6ded592596"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 6, 39, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "28ddf022-0a8a-4ecd-bf18-d80af26c3aff", RecordingMbId: "4d3de31d-d25f-3abf-9bd1-8e38b62dd37e"}, 518 - {TrackName: "Already There", Artists: []PlayRecordArtist{{ArtistName: "Taylor McFerrin", ArtistMbId: "7abc2c7b-f47f-4d94-b75f-8cb4ca926899"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 10, 14, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "2113fbb8-c2b3-4aaf-9b33-92845940f82d"}, 519 - {TrackName: "Here We Go Again", Artists: []PlayRecordArtist{{ArtistName: "Buckshot LeFonque", ArtistMbId: "c1085917-1048-4f49-91d8-f7f7625e3545"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 13, 15, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "02437aee-0d9a-3134-806e-c27d799203d2", RecordingMbId: "0d5ee57b-21cb-329d-872c-43569e13c151"}, 520 - {TrackName: "Life's Work", Artists: []PlayRecordArtist{{ArtistName: "LooPRaT"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 15, 41, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "6221803a-4228-42e2-9191-30723b1faa0e", RecordingMbId: "63d324cd-a34b-455b-a8d6-1572766854b9"}, 521 - {TrackName: "Chaser", Artists: []PlayRecordArtist{{ArtistName: "Electric Wire Hustle", ArtistMbId: "77fc277e-f79d-40b1-b5c8-92702c86b760"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 19, 13, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "1e42946f-5a6b-3177-9fab-73a8b3377486", RecordingMbId: "3d356387-45fc-4ffa-8a34-76aae01f6de7"}, 522 - {TrackName: "Burn & Rise", Artists: []PlayRecordArtist{{ArtistName: "Yazmin Lacey", ArtistMbId: "451919df-764c-40cf-9aa2-fcafe599d869"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 23, 58, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "bdba3763-8f61-4598-8a20-e78424dc744b", RecordingMbId: "4d7b93fc-3651-4396-88b9-7bc98eb35e09"}, 523 - {TrackName: "I Want You", Artists: []PlayRecordArtist{{ArtistName: "Robert Glasper", ArtistMbId: "6e8f82ea-9e6d-4fdd-9b32-32feef13186b"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 26, 38, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "092c640b-c864-4e4b-abab-e44f1c0fe681", RecordingMbId: "369d7f97-1a10-4b8f-b867-05f5b64b5edf"}, 524 - {TrackName: "Go On", Artists: []PlayRecordArtist{{ArtistName: "Snoop Dogg", ArtistMbId: "f90e8b26-9e52-4669-a5c9-e28529c47894"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 21, 8, 2, 43, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "e228272d-9b8c-4993-b2ee-ae9a0dbfe816"}, 525 - }, 526 - spotify: []PlayRecord{ 527 - {TrackName: "Lil Girl feat. Fatima", Artists: []PlayRecordArtist{{ArtistName: "Shafiq Husayn"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 0, 55, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 230200}, 528 - {TrackName: "Written In Stone - KAYTRANADA Remix", Artists: []PlayRecordArtist{{ArtistName: "Robert Glasper"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 3, 58, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 182452}, 529 - {TrackName: "Gum In My Mouth", Artists: []PlayRecordArtist{{ArtistName: "Butcher Brown"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 6, 39, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 159853}, 530 - {TrackName: "Welcome to the World of the Plastic Beach (feat. Snoop Dogg and Hypnotic Brass Ensemble)", Artists: []PlayRecordArtist{{ArtistName: "Gorillaz"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 10, 14, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 215506}, 531 - {TrackName: "Already There", Artists: []PlayRecordArtist{{ArtistName: "Taylor McFerrin"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 13, 15, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 179989}, 532 - {TrackName: "Here We Go Again", Artists: []PlayRecordArtist{{ArtistName: "Buckshot LeFonque"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 15, 41, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 146600}, 533 - {TrackName: "Life's Work", Artists: []PlayRecordArtist{{ArtistName: "LOOPRAT"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 19, 13, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 210280}, 534 - {TrackName: "Chaser", Artists: []PlayRecordArtist{{ArtistName: "Electric Wire Hustle"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 23, 58, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 285493}, 535 - {TrackName: "Burn & Rise", Artists: []PlayRecordArtist{{ArtistName: "Yazmin Lacey"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 26, 38, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 158685}, 536 - {TrackName: "I Want You", Artists: []PlayRecordArtist{{ArtistName: "Robert Glasper"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 29, 2, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 141024}, 537 - }, 538 - tolerance: DefaultCrossSourceTolerance, 539 - expectedLen: 12, 540 - expectedDuplicates: 8, 541 - }, 542 - { 543 - name: "mixed sources and edge cases", 544 - lastfm: []PlayRecord{ 545 - {TrackName: "Roi du nord (Freestyle)", Artists: []PlayRecordArtist{{ArtistName: "Jack Uzi", ArtistMbId: "9b205338-4565-4b14-8e4b-94d1abfedfbc"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 12, 10, 37, 30, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "abc"}, 546 - {TrackName: "Rap conscient", Artists: []PlayRecordArtist{{ArtistName: "Jack Uzi", ArtistMbId: "9b205338-4565-4b14-8e4b-94d1abfedfbc"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 12, 10, 39, 39, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 547 - {TrackName: "Saint jack", Artists: []PlayRecordArtist{{ArtistName: "Jack Uzi", ArtistMbId: "9b205338-4565-4b14-8e4b-94d1abfedfbc"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 12, 10, 41, 57, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 548 - {TrackName: "(I'm a Kadaver) Alakazam", Artists: []PlayRecordArtist{{ArtistName: "Psychedelic Porn Crumpets", ArtistMbId: "11d94660-1963-4020-8762-4c5907e2ea48"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 3, 57, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, RecordingMbId: "xyz"}, 549 - {TrackName: "Teddy Picker", Artists: []PlayRecordArtist{{ArtistName: "Arctic Monkeys", ArtistMbId: "ada7a83c-e3e1-40f1-93f9-3e73dbc9298a"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 7, 43, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "def"}, 550 - {TrackName: "New Gold (feat. Tame Impala and Bootie Brown)", Artists: []PlayRecordArtist{{ArtistName: "Gorillaz", ArtistMbId: "e21857d5-3256-4547-afb3-4b6ded592596"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 10, 24, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 551 - {TrackName: "The Pretender", Artists: []PlayRecordArtist{{ArtistName: "Foo Fighters", ArtistMbId: "67f66c07-6e61-4026-ade5-7e782fad3a5d"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 13, 57, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 552 - {TrackName: "Make It Wit Chu", Artists: []PlayRecordArtist{{ArtistName: "Queens of the Stone Age", ArtistMbId: "7dc8f5bd-9d0b-4087-9f73-dc164950bbd8"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 18, 23, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 553 - {TrackName: "Broken Boy", Artists: []PlayRecordArtist{{ArtistName: "Cage the Elephant", ArtistMbId: "b41b38d4-ef3e-4f37-8c75-cfe9af999696"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 23, 11, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 554 - {TrackName: "Take Me Out", Artists: []PlayRecordArtist{{ArtistName: "Franz Ferdinand", ArtistMbId: "aa7a2827-f74b-473c-bd79-03d065835cf7"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 25, 52, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 555 - }, 556 - spotify: []PlayRecord{ 557 - {TrackName: "Roi du nord (Freestyle)", Artists: []PlayRecordArtist{{ArtistName: "Jack Uzi"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 12, 10, 39, 37, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 180000}, 558 - {TrackName: "Rap conscient", Artists: []PlayRecordArtist{{ArtistName: "Jack Uzi"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 12, 10, 41, 59, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 200000}, 559 - {TrackName: "Saint jack", Artists: []PlayRecordArtist{{ArtistName: "Jack Uzi"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 12, 10, 43, 30, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 190000}, 560 - {TrackName: "Ford (Freestyle)", Artists: []PlayRecordArtist{{ArtistName: "Jack Uzi"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 12, 10, 44, 32, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 170000}, 561 - {TrackName: "(I'm a Kadaver) Alakazam", Artists: []PlayRecordArtist{{ArtistName: "Psychedelic Porn Crumpets"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 7, 44, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 228954}, 562 - {TrackName: "Teddy Picker", Artists: []PlayRecordArtist{{ArtistName: "Arctic Monkeys"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 10, 27, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 165000}, 563 - {TrackName: "New Gold (feat. Tame Impala and Bootie Brown)", Artists: []PlayRecordArtist{{ArtistName: "Gorillaz"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 13, 58, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 215149}, 564 - {TrackName: "The Pretender", Artists: []PlayRecordArtist{{ArtistName: "Foo Fighters"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 18, 25, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 269373}, 565 - {TrackName: "Make It Wit Chu", Artists: []PlayRecordArtist{{ArtistName: "Queens of the Stone Age"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 23, 13, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 290493}, 566 - {TrackName: "Broken Boy", Artists: []PlayRecordArtist{{ArtistName: "Cage The Elephant"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 25, 52, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 163200}, 567 - }, 568 - tolerance: DefaultCrossSourceTolerance, 569 - expectedLen: 11, 570 - expectedDuplicates: 9, 571 - }, 572 - { 573 - name: "same song different days not merged", 574 - lastfm: []PlayRecord{ 575 - {TrackName: "After School", Artists: []PlayRecordArtist{{ArtistName: "Weeekly"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 18, 17, 59, 3, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 576 - {TrackName: "Blue Flame", Artists: []PlayRecordArtist{{ArtistName: "LE SSERAFIM"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 18, 17, 59, 4, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 577 - {TrackName: "After LIKE", Artists: []PlayRecordArtist{{ArtistName: "IVE"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 18, 18, 2, 25, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 578 - }, 579 - spotify: []PlayRecord{ 580 - {TrackName: "After School", Artists: []PlayRecordArtist{{ArtistName: "Weeekly"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 18, 18, 2, 26, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify}, 581 - {TrackName: "After LIKE", Artists: []PlayRecordArtist{{ArtistName: "IVE"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 18, 18, 5, 21, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify}, 582 - }, 583 - tolerance: DefaultCrossSourceTolerance, 584 - expectedLen: 3, 585 - expectedDuplicates: 2, 586 - expectedLastFMUnique: 3, 587 - expectedSpotifyUnique: 0, 588 - }, 589 - { 590 - name: "hyphen vs space dedupe with space-less normalization", 591 - lastfm: []PlayRecord{ 592 - {TrackName: "So This is Love?", Artists: []PlayRecordArtist{{ArtistName: "George Benson"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 16, 10, 14, 32, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 593 - {TrackName: "Cream Puff War", Artists: []PlayRecordArtist{{ArtistName: "Grateful Dead"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 16, 10, 18, 12, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 594 - {TrackName: "V Neck Sweater", Artists: []PlayRecordArtist{{ArtistName: "The Greyboy Allstars"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 16, 10, 20, 43, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 595 - {TrackName: "Jungle Boogie-in", Artists: []PlayRecordArtist{{ArtistName: "Ghost-Note"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 16, 10, 22, 53, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 596 - }, 597 - spotify: []PlayRecord{ 598 - {TrackName: "V-Neck Sweater", Artists: []PlayRecordArtist{{ArtistName: "The Greyboy Allstars"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 16, 10, 20, 43, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify}, 599 - {TrackName: "Jungle Boogie-in", Artists: []PlayRecordArtist{{ArtistName: "Ghost-Note"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 16, 10, 22, 53, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify}, 600 - }, 601 - tolerance: DefaultCrossSourceTolerance, 602 - expectedLen: 4, 603 - expectedDuplicates: 2, 604 - expectedLastFMUnique: 4, 605 - expectedSpotifyUnique: 0, 606 - }, 607 - { 608 - name: "korean artist name and diacritics", 609 - lastfm: []PlayRecord{ 610 - {TrackName: "DAAAAAMMMN", Artists: []PlayRecordArtist{{ArtistName: "김재중"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 16, 16, 51, 1, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 611 - {TrackName: "Déjà fait", Artists: []PlayRecordArtist{{ArtistName: "Peet"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 17, 0, 10, 43, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 374 + {TrackName: "Popular Song", Artists: []PlayRecordArtist{{ArtistName: "Popular Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 375 + {TrackName: "Popular Song", Artists: []PlayRecordArtist{{ArtistName: "Popular Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(5 * time.Second)}, MusicServiceBaseDomain: MusicServiceLastFM}, 376 + {TrackName: "Popular Song", Artists: []PlayRecordArtist{{ArtistName: "Popular Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(10 * time.Second)}, MusicServiceBaseDomain: MusicServiceLastFM}, 612 377 }, 613 378 spotify: []PlayRecord{ 614 - {TrackName: "DAAAAAMMMN", Artists: []PlayRecordArtist{{ArtistName: "김재중"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 16, 16, 51, 1, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify}, 615 - {TrackName: "Déjà fait", Artists: []PlayRecordArtist{{ArtistName: "Peet"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 17, 0, 7, 21, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify}, 379 + {TrackName: "Popular Song", Artists: []PlayRecordArtist{{ArtistName: "Popular Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(15 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 380 + {TrackName: "Popular Song", Artists: []PlayRecordArtist{{ArtistName: "Popular Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(20 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 616 381 }, 617 - tolerance: DefaultCrossSourceTolerance, 618 - expectedLen: 2, 619 - expectedDuplicates: 2, 620 - expectedLastFMUnique: 2, 621 - expectedSpotifyUnique: 0, 382 + tolerance: 30 * time.Second, 383 + expectedLen: 1, 384 + expectedMergedTotal: 1, 622 385 }, 623 - } 624 - 625 - for _, tt := range tests { 626 - t.Run(tt.name, func(t *testing.T) { 627 - merged, stats := MergeRecords(tt.lastfm, tt.spotify, tt.tolerance) 628 - 629 - if len(merged) != tt.expectedLen { 630 - t.Errorf("len(merged) = %d, want %d", len(merged), tt.expectedLen) 631 - } 632 - if stats.DuplicatesRemoved != tt.expectedDuplicates { 633 - t.Errorf("stats.DuplicatesRemoved = %d, want %d", stats.DuplicatesRemoved, tt.expectedDuplicates) 634 - } 635 - if tt.expectedLastFMUnique > 0 && stats.LastFMUnique != tt.expectedLastFMUnique { 636 - t.Errorf("stats.LastFMUnique = %d, want %d", stats.LastFMUnique, tt.expectedLastFMUnique) 637 - } 638 - if tt.expectedSpotifyUnique > 0 && stats.SpotifyUnique != tt.expectedSpotifyUnique { 639 - t.Errorf("stats.SpotifyUnique = %d, want %d", stats.SpotifyUnique, tt.expectedSpotifyUnique) 640 - } 641 - if stats.LastFMTotal != len(tt.lastfm) { 642 - t.Errorf("stats.LastFMTotal = %d, want %d", stats.LastFMTotal, len(tt.lastfm)) 643 - } 644 - if stats.SpotifyTotal != len(tt.spotify) { 645 - t.Errorf("stats.SpotifyTotal = %d, want %d", stats.SpotifyTotal, len(tt.spotify)) 646 - } 647 - }) 648 - } 649 - } 650 - 651 - func TestMergeRecordsSortedByTime(t *testing.T) { 652 - baseTime := time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC) 653 - 654 - tests := []struct { 655 - name string 656 - lastfm []PlayRecord 657 - spotify []PlayRecord 658 - tolerance time.Duration 659 - expectedOrder []string 660 - }{ 661 386 { 662 - name: "unsorted input sorted by time", 663 - lastfm: []PlayRecord{ 664 - {TrackName: "Later", Artists: []PlayRecordArtist{{ArtistName: "A"}}, PlayedTime: Timestamp{Time: baseTime.Add(2 * time.Hour)}, MusicServiceBaseDomain: MusicServiceLastFM}, 665 - {TrackName: "First", Artists: []PlayRecordArtist{{ArtistName: "A"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 666 - }, 667 - spotify: []PlayRecord{ 668 - {TrackName: "Middle", Artists: []PlayRecordArtist{{ArtistName: "A"}}, PlayedTime: Timestamp{Time: baseTime.Add(1 * time.Hour)}, MusicServiceBaseDomain: MusicServiceSpotify}, 669 - }, 670 - tolerance: DefaultCrossSourceTolerance, 671 - expectedOrder: []string{"First", "Middle", "Later"}, 672 - }, 673 - { 674 - name: "same timestamp sorted by track name", 675 - lastfm: []PlayRecord{ 676 - {TrackName: "B Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 677 - {TrackName: "A Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceSpotify}, 678 - }, 679 - spotify: []PlayRecord{}, 680 - tolerance: DefaultCrossSourceTolerance, 681 - expectedOrder: []string{"A Song", "B Song"}, 682 - }, 683 - { 684 - name: "many records out of order", 387 + name: "adjacent bucket detection works", 685 388 lastfm: []PlayRecord{ 686 - {TrackName: "Song 5", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(40 * time.Minute)}, MusicServiceBaseDomain: MusicServiceLastFM}, 687 - {TrackName: "Song 1", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 688 - {TrackName: "Song 3", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(20 * time.Minute)}, MusicServiceBaseDomain: MusicServiceLastFM}, 389 + {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(29 * time.Second)}, MusicServiceBaseDomain: MusicServiceLastFM}, 689 390 }, 690 391 spotify: []PlayRecord{ 691 - {TrackName: "Song 2", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(10 * time.Minute)}, MusicServiceBaseDomain: MusicServiceSpotify}, 692 - {TrackName: "Song 4", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(30 * time.Minute)}, MusicServiceBaseDomain: MusicServiceSpotify}, 392 + {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(31 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 693 393 }, 694 - tolerance: DefaultCrossSourceTolerance, 695 - expectedOrder: []string{"Song 1", "Song 2", "Song 3", "Song 4", "Song 5"}, 394 + tolerance: 5 * time.Second, 395 + expectedLen: 1, 396 + expectedMergedTotal: 1, 696 397 }, 697 398 } 698 399 699 400 for _, tt := range tests { 700 401 t.Run(tt.name, func(t *testing.T) { 701 - merged, _ := MergeRecords(tt.lastfm, tt.spotify, tt.tolerance) 402 + result := kway.Merge([][]PlayRecord{tt.lastfm, tt.spotify}, tt.tolerance) 702 403 703 - if len(merged) != len(tt.expectedOrder) { 704 - t.Fatalf("len(merged) = %d, want %d", len(merged), len(tt.expectedOrder)) 404 + if len(result) != tt.expectedLen { 405 + t.Errorf("MergeRecords() length = %d, want %d", len(result), tt.expectedLen) 705 406 } 706 407 707 - for i, expected := range tt.expectedOrder { 708 - if merged[i].TrackName != expected { 709 - t.Errorf("merged[%d].TrackName = %q, want %q", i, merged[i].TrackName, expected) 408 + if tt.expectedFirstTrack != "" && len(result) > 0 { 409 + if result[0].TrackName != tt.expectedFirstTrack { 410 + t.Errorf("MergeRecords() first track = %q, want %q", result[0].TrackName, tt.expectedFirstTrack) 710 411 } 711 412 } 712 - }) 713 - } 714 - } 715 413 716 - func TestMergeRecordsLastFMPriority(t *testing.T) { 717 - baseTime := time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC) 718 - 719 - tests := []struct { 720 - name string 721 - lastfm PlayRecord 722 - spotify PlayRecord 723 - tolerance time.Duration 724 - expectedService string 725 - }{ 726 - { 727 - name: "lastfm wins same timestamp", 728 - lastfm: PlayRecord{ 729 - TrackName: "Song", 730 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 731 - PlayedTime: Timestamp{Time: baseTime}, 732 - MusicServiceBaseDomain: MusicServiceLastFM, 733 - }, 734 - spotify: PlayRecord{ 735 - TrackName: "Song", 736 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 737 - PlayedTime: Timestamp{Time: baseTime}, 738 - MusicServiceBaseDomain: MusicServiceSpotify, 739 - }, 740 - tolerance: DefaultCrossSourceTolerance, 741 - expectedService: MusicServiceLastFM, 742 - }, 743 - { 744 - name: "lastfm wins within tolerance", 745 - lastfm: PlayRecord{ 746 - TrackName: "Song", 747 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 748 - PlayedTime: Timestamp{Time: baseTime}, 749 - MusicServiceBaseDomain: MusicServiceLastFM, 750 - }, 751 - spotify: PlayRecord{ 752 - TrackName: "Song", 753 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 754 - PlayedTime: Timestamp{Time: baseTime.Add(29 * time.Second)}, 755 - MusicServiceBaseDomain: MusicServiceSpotify, 756 - }, 757 - tolerance: DefaultCrossSourceTolerance, 758 - expectedService: MusicServiceLastFM, 759 - }, 760 - { 761 - name: "lastfm wins even when later", 762 - lastfm: PlayRecord{ 763 - TrackName: "Song", 764 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 765 - PlayedTime: Timestamp{Time: baseTime.Add(31 * time.Second)}, 766 - MusicServiceBaseDomain: MusicServiceLastFM, 767 - }, 768 - spotify: PlayRecord{ 769 - TrackName: "Song", 770 - Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 771 - PlayedTime: Timestamp{Time: baseTime}, 772 - MusicServiceBaseDomain: MusicServiceSpotify, 773 - }, 774 - tolerance: DefaultCrossSourceTolerance, 775 - expectedService: MusicServiceLastFM, 776 - }, 777 - } 778 - 779 - for _, tt := range tests { 780 - t.Run(tt.name, func(t *testing.T) { 781 - merged, _ := MergeRecords([]PlayRecord{tt.lastfm}, []PlayRecord{tt.spotify}, tt.tolerance) 782 - 783 - if len(merged) != 1 { 784 - t.Fatalf("len(merged) = %d, want 1", len(merged)) 414 + if len(tt.expectedOrder) > 0 { 415 + if len(result) != len(tt.expectedOrder) { 416 + t.Errorf("MergeRecords() order length mismatch, got %d, want %d", len(result), len(tt.expectedOrder)) 417 + } else { 418 + for i, expectedTrack := range tt.expectedOrder { 419 + if i < len(result) && result[i].TrackName != expectedTrack { 420 + t.Errorf("MergeRecords() order[%d] = %q, want %q", i, result[i].TrackName, expectedTrack) 421 + } 422 + } 423 + } 785 424 } 786 - if merged[0].MusicServiceBaseDomain != tt.expectedService { 787 - t.Errorf("merged[0].MusicServiceBaseDomain = %q, want %q", merged[0].MusicServiceBaseDomain, tt.expectedService) 788 - } 789 - }) 790 - } 791 - } 792 425 793 - func TestMergeRecordsStats(t *testing.T) { 794 - baseTime := time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC) 795 - 796 - tests := []struct { 797 - name string 798 - lastfm []PlayRecord 799 - spotify []PlayRecord 800 - tolerance time.Duration 801 - expectedLastFMTotal int 802 - expectedSpotifyTotal int 803 - expectedMergedTotal int 804 - }{ 805 - { 806 - name: "all unique", 807 - lastfm: []PlayRecord{{TrackName: "A", Artists: []PlayRecordArtist{{ArtistName: "X"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}}, 808 - spotify: []PlayRecord{{TrackName: "B", Artists: []PlayRecordArtist{{ArtistName: "Y"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Hour)}, MusicServiceBaseDomain: MusicServiceSpotify}}, 809 - tolerance: DefaultCrossSourceTolerance, 810 - expectedLastFMTotal: 1, 811 - expectedSpotifyTotal: 1, 812 - expectedMergedTotal: 2, 813 - }, 814 - { 815 - name: "all duplicates", 816 - lastfm: []PlayRecord{{TrackName: "A", Artists: []PlayRecordArtist{{ArtistName: "X"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}}, 817 - spotify: []PlayRecord{{TrackName: "A", Artists: []PlayRecordArtist{{ArtistName: "X"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceSpotify}}, 818 - tolerance: DefaultCrossSourceTolerance, 819 - expectedLastFMTotal: 1, 820 - expectedSpotifyTotal: 1, 821 - expectedMergedTotal: 1, 822 - }, 823 - { 824 - name: "mixed", 825 - lastfm: []PlayRecord{ 826 - {TrackName: "A", Artists: []PlayRecordArtist{{ArtistName: "X"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 827 - {TrackName: "B", Artists: []PlayRecordArtist{{ArtistName: "Y"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Hour)}, MusicServiceBaseDomain: MusicServiceLastFM}, 828 - }, 829 - spotify: []PlayRecord{ 830 - {TrackName: "A", Artists: []PlayRecordArtist{{ArtistName: "X"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceSpotify}, 831 - {TrackName: "C", Artists: []PlayRecordArtist{{ArtistName: "Z"}}, PlayedTime: Timestamp{Time: baseTime.Add(2 * time.Hour)}, MusicServiceBaseDomain: MusicServiceSpotify}, 832 - }, 833 - tolerance: DefaultCrossSourceTolerance, 834 - expectedLastFMTotal: 2, 835 - expectedSpotifyTotal: 2, 836 - expectedMergedTotal: 3, 837 - }, 838 - } 839 - 840 - for _, tt := range tests { 841 - t.Run(tt.name, func(t *testing.T) { 842 - _, stats := MergeRecords(tt.lastfm, tt.spotify, tt.tolerance) 843 - 844 - if stats.LastFMTotal != tt.expectedLastFMTotal { 845 - t.Errorf("stats.LastFMTotal = %d, want %d", stats.LastFMTotal, tt.expectedLastFMTotal) 846 - } 847 - if stats.SpotifyTotal != tt.expectedSpotifyTotal { 848 - t.Errorf("stats.SpotifyTotal = %d, want %d", stats.SpotifyTotal, tt.expectedSpotifyTotal) 849 - } 850 - if stats.MergedTotal != tt.expectedMergedTotal { 851 - t.Errorf("stats.MergedTotal = %d, want %d", stats.MergedTotal, tt.expectedMergedTotal) 426 + // Verify sorting is correct 427 + for i := 1; i < len(result); i++ { 428 + prev, curr := result[i-1], result[i] 429 + if prev.PlayedTime.After(curr.PlayedTime.Time) { 430 + t.Errorf("MergeRecords() sorting failed: %q at %v should be after %q at %v", 431 + prev.TrackName, prev.PlayedTime.Time, curr.TrackName, curr.PlayedTime.Time) 432 + } 433 + if prev.PlayedTime.Equal(curr.PlayedTime.Time) && prev.TrackName > curr.TrackName { 434 + t.Errorf("MergeRecords() same-time sorting failed: %q should be before %q", 435 + prev.TrackName, curr.TrackName) 436 + } 852 437 } 853 438 }) 854 439 }