like malachite (atproto-lastfm-importer) but in go and bluer
go spotify tealfm lastfm atproto
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

refactor!: data storage now uses tid for storing records

karitham 26ea91c9 0f28bae4

+228 -285
+8 -14
main.go
··· 277 277 278 278 for _, fr := range failedRecords { 279 279 if dryRun { 280 - fmt.Printf("[DRY-RUN] Would retry: %s - %s\n", getArtistName(fr.rec), fr.rec.TrackName) 280 + fmt.Printf("[DRY-RUN] Would retry: %s - %s\n", fr.rec.ArtistName(), fr.rec.TrackName) 281 281 successCount++ 282 282 continue 283 283 } ··· 291 291 res := sync.PublishBatch(ctx, repoClient, did, []sync.PlayRecord{fr.rec}, w, g, a.storage) 292 292 293 293 if res.ErrorCount == 0 { 294 - fmt.Printf("Successfully retried: %s - %s\n", getArtistName(fr.rec), fr.rec.TrackName) 294 + fmt.Printf("Successfully retried: %s - %s\n", fr.rec.ArtistName(), fr.rec.TrackName) 295 295 // Mark as published (updates processedBucket to 1) 296 296 if err := a.storage.MarkPublished(did, fr.key); err != nil { 297 297 a.log.Error("Failed to mark record as published", logutil.Error(err), slog.String("key", fr.key)) ··· 302 302 } 303 303 successCount++ 304 304 } else { 305 - fmt.Printf("Failed again: %s - %s: %v\n", getArtistName(fr.rec), fr.rec.TrackName, res.LastError) 305 + fmt.Printf("Failed again: %s - %s: %v\n", fr.rec.ArtistName(), fr.rec.TrackName, res.LastError) 306 306 errorCount++ 307 307 limiter.RefundBulkWrite(1) 308 308 } ··· 361 361 for _, f := range failed { 362 362 fmt.Printf(" [%s] %s - %s: %s\n", 363 363 f.Record.PlayedTime.Format(time.RFC3339), 364 - getArtistName(f.Record), 364 + f.Record.ArtistName(), 365 365 f.Record.TrackName, 366 366 f.Error) 367 367 } ··· 579 579 580 580 if len(newRecords) > 0 { 581 581 newEntries := make(map[string][]byte) 582 - for _, rec := range newRecords { 583 - key := sync.CreateRecordKey(rec) 582 + keys := sync.CreateRecordKeys(newRecords) 583 + for i, rec := range newRecords { 584 + key := keys[i] 584 585 value, _ := json.Marshal(rec) 585 586 newEntries[key] = value 586 587 } ··· 737 738 a.log.InfoContext(ctx, "Would remove", 738 739 slog.String("uri", rec.URI), 739 740 slog.String("track", keep.Value.TrackName), 740 - slog.String("artist", getArtistName(keep.Value)), 741 + slog.String("artist", keep.Value.ArtistName()), 741 742 slog.String("time", keep.Value.PlayedTime.Format(time.RFC3339))) 742 743 } 743 744 } ··· 836 837 837 838 return nil 838 839 } 839 - 840 - func getArtistName(record sync.PlayRecord) string { 841 - if len(record.Artists) > 0 { 842 - return record.Artists[0].ArtistName 843 - } 844 - return "Unknown Artist" 845 - }
+12 -57
sync/adapter.go
··· 5 5 "encoding/json" 6 6 "fmt" 7 7 "log/slog" 8 + "strings" 8 9 9 10 "github.com/bluesky-social/indigo/atproto/atclient" 10 11 "github.com/bluesky-social/indigo/atproto/syntax" ··· 92 93 slog.Debug("failed to unmarshal record", slog.String("uri", r.URI), logutil.Error(err)) 93 94 continue 94 95 } 95 - slog.Debug("parsed record", slog.String("uri", r.URI), logutil.Track(playRecord.TrackName, getArtistName(playRecord), playRecord.PlayedTime.Time)) 96 + slog.Debug("parsed record", slog.String("uri", r.URI), logutil.Track(playRecord.TrackName, playRecord.ArtistName(), playRecord.PlayedTime.Time)) 96 97 } 97 98 out = append(out, RecordRef{ 98 99 URI: r.URI, ··· 168 169 c.limiter.RefundBulkWrite(1) 169 170 } 170 171 return err 171 - } 172 - 173 - type ExistingRecord struct { 174 - URI string 175 - CID string 176 - Value PlayRecord 177 172 } 178 173 179 174 func FetchExisting(ctx context.Context, client RepoClient, did string, storage cache.Storage, forceRefresh bool) ([]ExistingRecord, error) { ··· 235 230 cacheEntries := make(map[string][]byte) 236 231 keys := make([]string, 0, len(allRecords)) 237 232 for _, rec := range allRecords { 238 - key := CreateRecordKey(rec.Value) 233 + // use the rkey from URI if available, otherwise fallback to generating it 234 + // URI is at://did/collection/rkey 235 + parts := strings.Split(rec.URI, "/") 236 + key := parts[len(parts)-1] 237 + if key == "" { 238 + key = CreateRecordKey(rec.Value) 239 + } 239 240 value, _ := json.Marshal(rec.Value) 240 241 cacheEntries[key] = value 241 242 keys = append(keys, key) ··· 256 257 return allRecords, nil 257 258 } 258 259 259 - func FilterNew(records []PlayRecord, existing []ExistingRecord, processed map[string]bool) []PlayRecord { 260 - existingKeys := make(map[string]bool) 261 - for _, rec := range existing { 262 - key := CreateRecordKey(rec.Value) 263 - if key == "|||" { 264 - continue 265 - } 266 - existingKeys[key] = true 267 - } 268 - 269 - var newRecords []PlayRecord 270 - for _, record := range records { 271 - key := CreateRecordKey(record) 272 - if !existingKeys[key] && !processed[key] { 273 - newRecords = append(newRecords, record) 274 - } 275 - } 276 - return newRecords 277 - } 278 - 279 - func FindDuplicates(records []ExistingRecord) map[string][]ExistingRecord { 280 - groups := make(map[string][]ExistingRecord) 281 - for _, rec := range records { 282 - key := CreateRecordKey(rec.Value) 283 - if key == "|||" { 284 - continue 285 - } 286 - groups[key] = append(groups[key], rec) 287 - } 288 - 289 - duplicates := make(map[string][]ExistingRecord) 290 - for key, group := range groups { 291 - if len(group) >= 2 { 292 - duplicates[key] = group 293 - } 294 - } 295 - return duplicates 296 - } 297 - 298 260 func prepareWrites(records []PlayRecord, collection string) ([]map[string]any, error) { 299 261 if len(records) == 0 { 300 262 return nil, nil 301 263 } 302 264 303 265 writes := make([]map[string]any, len(records)) 304 - usedTIDs := make(map[syntax.TID]int) 266 + keys := CreateRecordKeys(records) 305 267 306 268 for i, rec := range records { 307 - tid := syntax.NewTIDFromTime(rec.PlayedTime.Time, 0) 308 - for usedTIDs[tid]++; usedTIDs[tid] > 1; { 309 - tid = syntax.NewTIDFromTime(rec.PlayedTime.Time, uint(usedTIDs[tid])) 310 - } 311 - usedTIDs[tid]++ 312 - 313 269 writes[i] = map[string]any{ 314 270 "$type": "com.atproto.repo.applyWrites#create", 315 271 "collection": collection, 316 - "rkey": string(tid), 272 + "rkey": keys[i], 317 273 "value": rec, 318 274 } 319 275 } ··· 322 278 } 323 279 324 280 func generateRecordURI(did string, record PlayRecord) string { 325 - tid := syntax.NewTIDFromTime(record.PlayedTime.Time, 0) 326 - return fmt.Sprintf("at://%s/%s/%s", did, RecordType, tid) 281 + return fmt.Sprintf("at://%s/%s/%s", did, RecordType, CreateRecordKey(record)) 327 282 } 328 283 329 284 func AddToCache(did string, records []ExistingRecord, storage cache.Storage) error {
+1 -1
sync/progress.go
··· 144 144 t.Errors += n 145 145 } 146 146 147 - func (t *ProgressTracker) Progress() (percent float64, eta time.Duration, elapsed time.Duration, rate string) { 147 + func (t *ProgressTracker) Progress() (percent float64, eta, elapsed time.Duration, rate string) { 148 148 t.mu.Lock() 149 149 defer t.mu.Unlock() 150 150
+9 -18
sync/publish.go
··· 79 79 for _, r := range batch { 80 80 tid := syntax.NewTIDFromTime(r.PlayedTime.Time, 0) 81 81 slog.Info("would publish record (dry run)", 82 - logutil.Track(r.TrackName, getArtistName(r), r.PlayedTime.Time), 82 + logutil.Track(r.TrackName, r.ArtistName(), r.PlayedTime.Time), 83 83 slog.String("rkey", string(tid))) 84 84 } 85 85 totalSuccess += len(batch) ··· 119 119 logutil.Error(lastResult.LastError), 120 120 slog.Int("count", len(batch)), 121 121 slog.Group("range", 122 - slog.Attr(logutil.Track(first.TrackName, getArtistName(first), first.PlayedTime.Time)), 123 - slog.Attr(logutil.Track(last.TrackName, getArtistName(last), last.PlayedTime.Time)))) 122 + slog.Attr(logutil.Track(first.TrackName, first.ArtistName(), first.PlayedTime.Time)), 123 + slog.Attr(logutil.Track(last.TrackName, last.ArtistName(), last.PlayedTime.Time)))) 124 124 } else { 125 125 slog.Error("batch failed after max retries", slog.Int("errorCount", lastResult.ErrorCount)) 126 126 } ··· 156 156 tracker.IncrementErrors(lastResult.ErrorCount) 157 157 158 158 if lastResult.SuccessCount > 0 && opts.Storage != nil { 159 - keys := makeRecordKeys(batch[:lastResult.SuccessCount]) 159 + keys := CreateRecordKeys(batch[:lastResult.SuccessCount]) 160 160 if err := opts.Storage.MarkPublished(did, keys...); err != nil { 161 161 slog.Error("failed to mark records as published", logutil.Error(err)) 162 162 } ··· 276 276 } 277 277 278 278 func makeRecordKeys(records []PlayRecord) []string { 279 - keys := make([]string, 0, len(records)) 280 - for _, record := range records { 281 - keys = append(keys, CreateRecordKey(record)) 282 - } 283 - return keys 279 + return CreateRecordKeys(records) 284 280 } 285 281 286 282 func logResult(success, errors int, startTime time.Time) { ··· 344 340 logBatch(atprotoRecords, consumedW, consumedG) 345 341 346 342 if storage != nil && did != "" { 347 - keys := makeRecordKeys(atprotoRecords) 343 + keys := CreateRecordKeys(atprotoRecords) 348 344 cacheEntries := make(map[string][]byte) 349 345 for i, rec := range atprotoRecords { 350 346 key := keys[i] ··· 380 376 slog.Int("global_consumed", consumedG), 381 377 slog.Int("global_limit", GlobalLimitDay), 382 378 slog.Int("global_remaining", GlobalLimitDay-consumedG), 383 - logutil.Track(first.TrackName, getArtistName(first), first.PlayedTime.Time), 384 - logutil.Track(last.TrackName, getArtistName(last), last.PlayedTime.Time)) 379 + logutil.Track(first.TrackName, first.ArtistName(), first.PlayedTime.Time), 380 + logutil.Track(last.TrackName, last.ArtistName(), last.PlayedTime.Time)) 385 381 } 386 382 387 - func getArtistName(record PlayRecord) string { 388 - if len(record.Artists) > 0 { 389 - return record.Artists[0].ArtistName 390 - } 391 - return "Unknown Artist" 392 - } 383 + // removed getArtistName from here to favor the one in record.go 393 384 394 385 func ratePerMinute(count int, duration time.Duration) float64 { 395 386 if duration == 0 {
+1 -1
sync/rate.go
··· 91 91 } 92 92 } 93 93 94 - func (l *quotaLimiter) wait(ctx context.Context, wKey, gKey string, wCost, gCost int, wLimit, gLimit int) error { 94 + func (l *quotaLimiter) wait(ctx context.Context, wKey, gKey string, wCost, gCost, wLimit, gLimit int) error { 95 95 l.mu.Lock() 96 96 defer l.mu.Unlock() 97 97
+159 -126
sync/record.go
··· 7 7 "time" 8 8 "unicode" 9 9 10 + "github.com/bluesky-social/indigo/atproto/syntax" 10 11 "golang.org/x/text/unicode/norm" 11 12 ) 12 13 13 - type Record struct { 14 - Record PlayRecord 15 - IsLastFM bool 16 - Time time.Time 14 + type ExistingRecord struct { 15 + URI string 16 + CID string 17 + Value PlayRecord 17 18 } 18 19 19 20 func normalizeString(s string) string { ··· 63 64 MsPlayed int `json:"msPlayed,omitempty"` 64 65 } 65 66 67 + func (r PlayRecord) ArtistName() string { 68 + if len(r.Artists) > 0 { 69 + return r.Artists[0].ArtistName 70 + } 71 + return "Unknown Artist" 72 + } 73 + 74 + func (r PlayRecord) NormalizedArtist() string { 75 + return normalizeString(r.ArtistName()) 76 + } 77 + 78 + func (r PlayRecord) NormalizedTrack() string { 79 + return normalizeString(r.TrackName) 80 + } 81 + 82 + func (r PlayRecord) HasMBID() bool { 83 + for _, a := range r.Artists { 84 + if a.ArtistMbId != "" { 85 + return true 86 + } 87 + } 88 + return r.RecordingMbId != "" 89 + } 90 + 91 + func (r PlayRecord) IsLastFM() bool { 92 + return r.MusicServiceBaseDomain == MusicServiceLastFM 93 + } 94 + 95 + func (r PlayRecord) BetterThan(other PlayRecord) bool { 96 + if r.IsLastFM() && !other.IsLastFM() { 97 + return true 98 + } 99 + if !r.IsLastFM() && other.IsLastFM() { 100 + return false 101 + } 102 + // Both same source, prefer the one with MBID 103 + if r.HasMBID() && !other.HasMBID() { 104 + return true 105 + } 106 + return false 107 + } 108 + 66 109 type PlayRecordArtist struct { 67 110 ArtistName string `json:"artistName"` 68 111 ArtistMbId string `json:"artistMbId,omitempty"` ··· 87 130 88 131 var ClientAgent = "lazuli/dev" 89 132 90 - type NormalizedRecord struct { 91 - original PlayRecord 92 - normalizedTrack string 93 - normalizedArtist string 94 - timestamp time.Time 95 - isLastFM bool 133 + func CreateRecordKey(record PlayRecord) string { 134 + return string(syntax.NewTIDFromTime(record.PlayedTime.Time, 0)) 96 135 } 97 136 98 - func hasMusicBrainzId(record PlayRecord) bool { 99 - for _, artist := range record.Artists { 100 - if artist.ArtistMbId != "" { 101 - return true 102 - } 103 - } 104 - return record.RecordingMbId != "" 105 - } 137 + func CreateRecordKeys(records []PlayRecord) []string { 138 + keys := make([]string, len(records)) 139 + usedTIDs := make(map[string]int) 106 140 107 - func selectBetterRecord(r1, r2 PlayRecord, r1IsLastFM, r2IsLastFM bool) PlayRecord { 108 - r1HasMbId := hasMusicBrainzId(r1) 109 - r2HasMbId := hasMusicBrainzId(r2) 110 - 111 - if r1IsLastFM && r2IsLastFM { 112 - if r1HasMbId && !r2HasMbId { 113 - return r1 114 - } 115 - if r2HasMbId && !r1HasMbId { 116 - return r2 141 + for i, rec := range records { 142 + t := rec.PlayedTime.Time 143 + tid := syntax.NewTIDFromTime(t, 0) 144 + for usedTIDs[string(tid)] > 0 { 145 + usedTIDs[string(tid)]++ 146 + tid = syntax.NewTIDFromTime(t, uint(usedTIDs[string(tid)]-1)) 117 147 } 118 - return r1 119 - } 120 - 121 - if r1IsLastFM && !r2IsLastFM { 122 - return r1 148 + usedTIDs[string(tid)]++ 149 + keys[i] = string(tid) 123 150 } 124 - 125 - if !r1IsLastFM && r2IsLastFM { 126 - return r2 127 - } 128 - 129 - return r1 130 - } 131 - 132 - func CreateRecordKey(record PlayRecord) string { 133 - artist := "" 134 - if len(record.Artists) > 0 { 135 - artist = record.Artists[0].ArtistName 136 - } 137 - // Use UTC and RFC3339 (seconds) for robust deduplication across different environments 138 - return fmt.Sprintf("%s|||%s|||%s", 139 - normalizeString(artist), 140 - normalizeString(record.TrackName), 141 - record.PlayedTime.UTC().Format(time.RFC3339), 142 - ) 151 + return keys 143 152 } 144 153 145 154 func MergeRecords(lastfm, spotify []PlayRecord, tolerance time.Duration) ([]PlayRecord, MergeStats) { ··· 148 157 SpotifyTotal: len(spotify), 149 158 } 150 159 151 - recordsMap := make(map[string]NormalizedRecord, len(lastfm)+len(spotify)) 160 + // key is normalizedTrack|normalizedArtist|bucket 161 + recordsMap := make(map[string]PlayRecord) 152 162 153 - processRecords := func(records []PlayRecord, isLastFM bool) { 154 - for _, record := range records { 155 - normalizedTrack := normalizeString(record.TrackName) 156 - normalizedArtist := normalizeString(getArtistName(record)) 163 + process := func(records []PlayRecord) { 164 + for _, rec := range records { 165 + track := rec.NormalizedTrack() 166 + artist := rec.NormalizedArtist() 167 + timestamp := rec.PlayedTime.Time 157 168 158 169 found := false 159 - 160 170 if tolerance > 0 { 161 - toleranceSeconds := int64(tolerance.Seconds()) 162 - bucketKey := record.PlayedTime.Unix() / toleranceSeconds 163 - 164 - for offset := int64(-1); offset <= 1; offset++ { 165 - mapKey := normalizedTrack + "|" + normalizedArtist + "|" + fmt.Sprintf("%d", bucketKey+offset) 166 - if existing, ok := recordsMap[mapKey]; ok { 167 - if abs(existing.timestamp.Sub(record.PlayedTime.Time)) <= tolerance { 168 - better := selectBetterRecord(existing.original, record, existing.isLastFM, isLastFM) 169 - recordsMap[mapKey] = NormalizedRecord{ 170 - original: better, 171 - normalizedTrack: normalizedTrack, 172 - normalizedArtist: normalizedArtist, 173 - timestamp: existing.timestamp, 174 - isLastFM: existing.isLastFM, 171 + bucket := timestamp.Unix() / int64(tolerance.Seconds()) 172 + // Check current and adjacent buckets 173 + for b := bucket - 1; b <= bucket+1; b++ { 174 + key := fmt.Sprintf("%s|%s|%d", track, artist, b) 175 + if existing, ok := recordsMap[key]; ok { 176 + diff := timestamp.Sub(existing.PlayedTime.Time) 177 + if diff < 0 { 178 + diff = -diff 179 + } 180 + if diff <= tolerance { 181 + if rec.BetterThan(existing) { 182 + recordsMap[key] = rec 175 183 } 176 184 stats.DuplicatesRemoved++ 177 185 found = true 178 186 break 179 187 } 188 + } 189 + } 190 + if !found { 191 + key := fmt.Sprintf("%s|%s|%d", track, artist, bucket) 192 + recordsMap[key] = rec 193 + if rec.IsLastFM() { 194 + stats.LastFMUnique++ 195 + } else { 196 + stats.SpotifyUnique++ 180 197 } 181 198 } 182 199 } else { 183 - mapKey := normalizedTrack + "|" + normalizedArtist + "|" + record.PlayedTime.Format(time.RFC3339) 184 - if existing, ok := recordsMap[mapKey]; ok { 185 - better := selectBetterRecord(existing.original, record, existing.isLastFM, isLastFM) 186 - recordsMap[mapKey] = NormalizedRecord{ 187 - original: better, 188 - normalizedTrack: normalizedTrack, 189 - normalizedArtist: normalizedArtist, 190 - timestamp: existing.timestamp, 191 - isLastFM: existing.isLastFM, 200 + key := fmt.Sprintf("%s|%s|%s", track, artist, timestamp.Format(time.RFC3339)) 201 + if existing, ok := recordsMap[key]; ok { 202 + if rec.BetterThan(existing) { 203 + recordsMap[key] = rec 192 204 } 193 205 stats.DuplicatesRemoved++ 194 - found = true 195 - } 196 - } 197 - 198 - if !found { 199 - if tolerance > 0 { 200 - toleranceSeconds := int64(tolerance.Seconds()) 201 - bucketKey := record.PlayedTime.Unix() / toleranceSeconds 202 - mapKey := normalizedTrack + "|" + normalizedArtist + "|" + fmt.Sprintf("%d", bucketKey) 203 - recordsMap[mapKey] = NormalizedRecord{ 204 - original: record, 205 - normalizedTrack: normalizedTrack, 206 - normalizedArtist: normalizedArtist, 207 - timestamp: record.PlayedTime.Time, 208 - isLastFM: isLastFM, 209 - } 210 206 } else { 211 - mapKey := normalizedTrack + "|" + normalizedArtist + "|" + record.PlayedTime.Format(time.RFC3339) 212 - recordsMap[mapKey] = NormalizedRecord{ 213 - original: record, 214 - normalizedTrack: normalizedTrack, 215 - normalizedArtist: normalizedArtist, 216 - timestamp: record.PlayedTime.Time, 217 - isLastFM: isLastFM, 207 + recordsMap[key] = rec 208 + if rec.IsLastFM() { 209 + stats.LastFMUnique++ 210 + } else { 211 + stats.SpotifyUnique++ 218 212 } 219 213 } 220 - if isLastFM { 221 - stats.LastFMUnique++ 222 - } else { 223 - stats.SpotifyUnique++ 224 - } 225 214 } 226 215 } 227 216 } 228 217 229 - processRecords(lastfm, true) 230 - processRecords(spotify, false) 218 + process(lastfm) 219 + process(spotify) 231 220 232 - uniqueRecords := make([]NormalizedRecord, 0, len(recordsMap)) 233 - for _, nr := range recordsMap { 234 - uniqueRecords = append(uniqueRecords, nr) 221 + result := make([]PlayRecord, 0, len(recordsMap)) 222 + for _, rec := range recordsMap { 223 + result = append(result, rec) 235 224 } 236 225 237 - stats.MergedTotal = len(uniqueRecords) 238 - 239 - sort.Slice(uniqueRecords, func(i, j int) bool { 240 - if !uniqueRecords[i].timestamp.Equal(uniqueRecords[j].timestamp) { 241 - return uniqueRecords[i].timestamp.Before(uniqueRecords[j].timestamp) 226 + sort.Slice(result, func(i, j int) bool { 227 + if !result[i].PlayedTime.Equal(result[j].PlayedTime.Time) { 228 + return result[i].PlayedTime.Before(result[j].PlayedTime.Time) 242 229 } 243 - return uniqueRecords[i].normalizedTrack < uniqueRecords[j].normalizedTrack 230 + return result[i].TrackName < result[j].TrackName 244 231 }) 245 232 246 - result := make([]PlayRecord, len(uniqueRecords)) 247 - for i, nr := range uniqueRecords { 248 - result[i] = nr.original 233 + stats.MergedTotal = len(result) 234 + return result, stats 235 + } 236 + 237 + func (r PlayRecord) SameAs(other PlayRecord, tolerance time.Duration) bool { 238 + if r.NormalizedTrack() != other.NormalizedTrack() { 239 + return false 240 + } 241 + if r.NormalizedArtist() != other.NormalizedArtist() { 242 + return false 243 + } 244 + 245 + diff := r.PlayedTime.Sub(other.PlayedTime.Time) 246 + if diff < 0 { 247 + diff = -diff 248 + } 249 + return diff <= tolerance 250 + } 251 + 252 + func FilterNew(records []PlayRecord, existing []ExistingRecord, processed map[string]bool) []PlayRecord { 253 + existingKeys := make(map[string]bool) 254 + for _, rec := range existing { 255 + key := CreateRecordKey(rec.Value) 256 + if key == "|||" { 257 + continue 258 + } 259 + existingKeys[key] = true 249 260 } 250 261 251 - return result, stats 262 + var newRecords []PlayRecord 263 + for _, record := range records { 264 + key := CreateRecordKey(record) 265 + if !existingKeys[key] && !processed[key] { 266 + newRecords = append(newRecords, record) 267 + } 268 + } 269 + return newRecords 252 270 } 253 271 254 - func abs[T ~int64](n T) T { 255 - return max(n, -n) 272 + func FindDuplicates(records []ExistingRecord) map[string][]ExistingRecord { 273 + groups := make(map[string][]ExistingRecord) 274 + for _, rec := range records { 275 + key := CreateRecordKey(rec.Value) 276 + if key == "|||" { 277 + continue 278 + } 279 + groups[key] = append(groups[key], rec) 280 + } 281 + 282 + duplicates := make(map[string][]ExistingRecord) 283 + for key, group := range groups { 284 + if len(group) >= 2 { 285 + duplicates[key] = group 286 + } 287 + } 288 + return duplicates 256 289 }
+38 -68
sync/record_test.go
··· 12 12 expected string 13 13 }{ 14 14 { 15 - name: "basic lowercase", 15 + name: "basic TID", 16 16 record: PlayRecord{ 17 17 TrackName: "Test Track", 18 18 Artists: []PlayRecordArtist{{ArtistName: "Test Artist"}}, 19 19 PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)}, 20 20 }, 21 - expected: "testartist|||testtrack|||2024-01-15T10:30:00Z", 22 - }, 23 - { 24 - name: "uppercase converted to lowercase", 25 - record: PlayRecord{ 26 - TrackName: "THERE IS A LIGHT THAT NEVER GOES OUT", 27 - Artists: []PlayRecordArtist{{ArtistName: "THE SMITHS"}}, 28 - PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)}, 29 - }, 30 - expected: "thesmiths|||thereisalightthatnevergoesout|||2024-01-15T10:30:00Z", 31 - }, 32 - { 33 - name: "punctuation removed", 34 - record: PlayRecord{ 35 - TrackName: "Don't Stop Me Now!", 36 - Artists: []PlayRecordArtist{{ArtistName: "Queen"}}, 37 - PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)}, 38 - }, 39 - expected: "queen|||dontstopmenow|||2024-01-15T10:30:00Z", 40 - }, 41 - { 42 - name: "special characters removed", 43 - record: PlayRecord{ 44 - TrackName: "Rock & Roll (Part 2)", 45 - Artists: []PlayRecordArtist{{ArtistName: "Gary Glitter"}}, 46 - PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)}, 47 - }, 48 - expected: "garyglitter|||rockrollpart2|||2024-01-15T10:30:00Z", 49 - }, 50 - { 51 - name: "empty artist defaults to empty string", 52 - record: PlayRecord{ 53 - TrackName: "Unknown Track", 54 - Artists: []PlayRecordArtist{}, 55 - PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)}, 56 - }, 57 - expected: "|||unknowntrack|||2024-01-15T10:30:00Z", 58 - }, 59 - { 60 - name: "multiple artists only uses first", 61 - record: PlayRecord{ 62 - TrackName: "Song", 63 - Artists: []PlayRecordArtist{{ArtistName: "First Artist"}, {ArtistName: "Second Artist"}}, 64 - PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)}, 65 - }, 66 - expected: "firstartist|||song|||2024-01-15T10:30:00Z", 67 - }, 68 - { 69 - name: "whitespace normalized", 70 - record: PlayRecord{ 71 - TrackName: "Song With Lots Of Spaces", 72 - Artists: []PlayRecordArtist{{ArtistName: " Artist With Spaces "}}, 73 - PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)}, 74 - }, 75 - expected: "artistwithspaces|||songwithlotsofspaces|||2024-01-15T10:30:00Z", 76 - }, 77 - { 78 - name: "diacritics normalized", 79 - record: PlayRecord{ 80 - TrackName: "Café", 81 - Artists: []PlayRecordArtist{{ArtistName: "Naïve"}}, 82 - PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)}, 83 - }, 84 - expected: "naive|||cafe|||2024-01-15T10:30:00Z", 21 + expected: "3kiz7zjhak222", 85 22 }, 86 23 } 87 24 ··· 92 29 t.Errorf("CreateRecordKey() = %q, want %q", result, tt.expected) 93 30 } 94 31 }) 32 + } 33 + } 34 + 35 + func TestCreateRecordKeys(t *testing.T) { 36 + baseTime := time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC) 37 + records := []PlayRecord{ 38 + {TrackName: "A", PlayedTime: Timestamp{Time: baseTime}}, 39 + {TrackName: "B", PlayedTime: Timestamp{Time: baseTime}}, 40 + {TrackName: "C", PlayedTime: Timestamp{Time: baseTime.Add(time.Second)}}, 41 + } 42 + 43 + keys := CreateRecordKeys(records) 44 + if len(keys) != 3 { 45 + t.Fatalf("expected 3 keys, got %d", len(keys)) 46 + } 47 + 48 + if keys[0] == keys[1] { 49 + t.Errorf("expected unique keys for same timestamp, got duplicate %q", keys[0]) 50 + } 51 + 52 + if keys[0] >= keys[1] { 53 + t.Errorf("expected keys to be sortable, got %q >= %q", keys[0], keys[1]) 54 + } 55 + 56 + if keys[1] >= keys[2] { 57 + t.Errorf("expected keys to be sortable by time, got %q >= %q", keys[1], keys[2]) 95 58 } 96 59 } 97 60 ··· 205 168 206 169 for _, tt := range tests { 207 170 t.Run(tt.name, func(t *testing.T) { 208 - result := selectBetterRecord(tt.r1, tt.r2, tt.r1IsLastFM, tt.r2IsLastFM) 209 - if result.MusicServiceBaseDomain != tt.expectedService { 210 - t.Errorf("selectBetterRecord() = %q, want %q", result.MusicServiceBaseDomain, tt.expectedService) 171 + result := tt.r1.BetterThan(tt.r2) 172 + var resultService string 173 + if result { 174 + resultService = tt.r1.MusicServiceBaseDomain 175 + } else { 176 + resultService = tt.r2.MusicServiceBaseDomain 177 + } 178 + 179 + if resultService != tt.expectedService { 180 + t.Errorf("BetterThan() result service = %q, want %q", resultService, tt.expectedService) 211 181 } 212 182 }) 213 183 }