like malachite (atproto-lastfm-importer) but in go and bluer
go spotify tealfm lastfm atproto
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

initial commit

karitham 0f28bae4

+5990
+2
.gitignore
··· 1 + lazuli 2 + result
+19
LICENSE
··· 1 + MIT License 2 + 3 + Permission is hereby granted, free of charge, to any person obtaining a copy 4 + of this software and associated documentation files (the "Software"), to deal 5 + in the Software without restriction, including without limitation the rights 6 + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 + copies of the Software, and to permit persons to whom the Software is 8 + furnished to do so, subject to the following conditions: 9 + 10 + The above copyright notice and this permission notice shall be included in all 11 + copies or substantial portions of the Software. 12 + 13 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 + SOFTWARE.
+103
README.md
··· 1 + # Lazuli 2 + 3 + Import Last.fm and Spotify listening history to teal.fm. 4 + 5 + ## Overview 6 + 7 + Lazuli is a command-line tool that parses listening history exports from Last.fm and Spotify, merges them to remove duplicates, and publishes them to teal.fm as `fm.teal.alpha.feed.play` records. 8 + 9 + Re-written from <https://tangled.org/ewancroft.uk/atproto-lastfm-importer>. 10 + 11 + ## Usage 12 + 13 + ### Authentication 14 + 15 + Set your Bluesky credentials via environment variables or flags: 16 + 17 + ```sh 18 + export LAZULI_HANDLE="your-handle.bsky.social" 19 + export LAZULI_PASSWORD="your-app-password" 20 + ``` 21 + 22 + ### Commands 23 + 24 + #### Export 25 + 26 + Parse and merge Last.fm/Spotify exports, output as JSON: 27 + 28 + ```sh 29 + lazuli export --lastfm=history.csv --spotify=streaming_history.json --output=merged.json 30 + ``` 31 + 32 + #### Import 33 + 34 + Import listening history to Bluesky with proper rate limiting: 35 + 36 + ```sh 37 + lazuli import --lastfm=history.csv --spotify=streaming_history.json 38 + ``` 39 + 40 + Resume is automatic - if interrupted, re-running skips already-imported records: 41 + 42 + Import modes: 43 + 44 + - `lastfm` - Import only Last.fm data 45 + - `spotify` - Import only Spotify data 46 + - `combined` - Merge both sources (default) 47 + 48 + #### Sync 49 + 50 + Fetch existing records and show statistics: 51 + 52 + ```sh 53 + lazuli sync 54 + ``` 55 + 56 + #### Dedupe 57 + 58 + Find and remove duplicate records from your Bluesky profile: 59 + 60 + ```sh 61 + lazuli dedupe --dry-run # Preview without deleting 62 + ``` 63 + 64 + ## Environment Variables 65 + 66 + | Variable | Description | 67 + | ----------------- | -------------------------------------------- | 68 + | `LAZULI_HANDLE` | Bluesky handle (e.g., `user.bsky.social`) | 69 + | `LAZULI_PASSWORD` | Bluesky app password | 70 + | `LAZULI_LASTFM` | Path to Last.fm CSV file | 71 + | `LAZULI_SPOTIFY` | Path to Spotify JSON file/directory/zip | 72 + | `LAZULI_MODE` | Import mode: `lastfm`, `spotify`, `combined` | 73 + | `LAZULI_DRY_RUN` | Preview without publishing | 74 + | `LAZULI_VERBOSE` | Enable verbose logging | 75 + | `LAZULI_REVERSE` | Process records in reverse order | 76 + 77 + ## Input Formats 78 + 79 + ### Last.fm 80 + 81 + Export your listening history from Last.fm. The CSV file should have columns: 82 + 83 + - UTC timestamp 84 + - Artist name 85 + - Album name 86 + - Track name 87 + - MusicBrainz IDs (optional) 88 + 89 + ### Spotify 90 + 91 + Download your extended streaming history from Spotify (Privacy settings). Lazuli accepts: 92 + 93 + - Single JSON files 94 + - Directories containing `Streaming_History_Audio_*.json` files 95 + - ZIP archives of the above 96 + 97 + ## Features 98 + 99 + - **Cross-source deduplication**: Merges Last.fm and Spotify data, removing duplicates within a configurable time tolerance 100 + - **Rate limiting**: Respects ATProto rate limits with configurable batch sizes and delays 101 + - **Automatic resume**: Cache tracks imported records - re-running skips already-imported entries 102 + - **Dry-run mode**: Preview imports without publishing 103 + - **Cache management**: Caches teal records for faster subsequent operations
+405
cache/bbolt.go
··· 1 + package cache 2 + 3 + import ( 4 + "encoding/json" 5 + "errors" 6 + "fmt" 7 + "os" 8 + "path/filepath" 9 + "strings" 10 + "time" 11 + 12 + "go.etcd.io/bbolt" 13 + ) 14 + 15 + var _ Storage = (*BoltStorage)(nil) 16 + 17 + var ErrCacheNotFound = errors.New("cache not found") 18 + 19 + type BoltStorage struct { 20 + db *bbolt.DB 21 + path string 22 + } 23 + 24 + func NewBoltStorage() (*BoltStorage, error) { 25 + dir, err := cacheDir() 26 + if err != nil { 27 + return nil, err 28 + } 29 + if err := os.MkdirAll(dir, 0o755); err != nil { 30 + return nil, err 31 + } 32 + db, err := bbolt.Open(filepath.Join(dir, CacheFile), 0o644, &bbolt.Options{ 33 + Timeout: time.Second, 34 + }) 35 + if err != nil { 36 + return nil, err 37 + } 38 + return &BoltStorage{db: db, path: dir}, nil 39 + } 40 + 41 + func (s *BoltStorage) Close() error { 42 + if s.db == nil { 43 + return nil 44 + } 45 + return s.db.Close() 46 + } 47 + 48 + func (s *BoltStorage) SaveRecords(did string, records map[string][]byte) error { 49 + if did == "" { 50 + return errors.New("did cannot be empty") 51 + } 52 + 53 + return s.db.Update(func(tx *bbolt.Tx) error { 54 + b, err := tx.CreateBucketIfNotExists([]byte(recordsBucket(did))) 55 + if err != nil { 56 + return err 57 + } 58 + 59 + for key, data := range records { 60 + if err := b.Put([]byte(key), data); err != nil { 61 + return err 62 + } 63 + } 64 + return s.setTimestamp(tx, did) 65 + }) 66 + } 67 + 68 + func (s *BoltStorage) IterateUnpublished(did string, fn func(key string, rec []byte) error) error { 69 + published, err := s.GetPublished(did) 70 + if err != nil { 71 + return err 72 + } 73 + 74 + return s.db.View(func(tx *bbolt.Tx) error { 75 + b := tx.Bucket([]byte(recordsBucket(did))) 76 + if b == nil { 77 + return nil 78 + } 79 + 80 + return b.ForEach(func(k, v []byte) error { 81 + key := string(k) 82 + if published[key] { 83 + return nil 84 + } 85 + return fn(key, v) 86 + }) 87 + }) 88 + } 89 + 90 + func (s *BoltStorage) IteratePublished(did string, fn func(key string, rec []byte) error) error { 91 + published, err := s.GetPublished(did) 92 + if err != nil { 93 + return err 94 + } 95 + 96 + return s.db.View(func(tx *bbolt.Tx) error { 97 + b := tx.Bucket([]byte(recordsBucket(did))) 98 + if b == nil { 99 + return nil 100 + } 101 + 102 + return b.ForEach(func(k, v []byte) error { 103 + key := string(k) 104 + if !published[key] { 105 + return nil 106 + } 107 + return fn(key, v) 108 + }) 109 + }) 110 + } 111 + 112 + func (s *BoltStorage) IterateFailed(did string, fn func(key string, rec []byte, errMsg string) error) error { 113 + return s.db.View(func(tx *bbolt.Tx) error { 114 + fb := tx.Bucket([]byte(failedBucket(did))) 115 + if fb == nil { 116 + return nil 117 + } 118 + rb := tx.Bucket([]byte(recordsBucket(did))) 119 + if rb == nil { 120 + return nil 121 + } 122 + 123 + return fb.ForEach(func(k, v []byte) error { 124 + key := string(k) 125 + errMsg := string(v) 126 + rec := rb.Get(k) 127 + return fn(key, rec, errMsg) 128 + }) 129 + }) 130 + } 131 + 132 + func (s *BoltStorage) MarkPublished(did string, keys ...string) error { 133 + return s.db.Update(func(tx *bbolt.Tx) error { 134 + b, err := tx.CreateBucketIfNotExists([]byte(processedBucket(did))) 135 + if err != nil { 136 + return err 137 + } 138 + for _, k := range keys { 139 + if err := b.Put([]byte(k), []byte{1}); err != nil { 140 + return err 141 + } 142 + } 143 + return nil 144 + }) 145 + } 146 + 147 + func (s *BoltStorage) MarkFailed(did string, keys []string, errMsg string) error { 148 + return s.db.Update(func(tx *bbolt.Tx) error { 149 + b, err := tx.CreateBucketIfNotExists([]byte(failedBucket(did))) 150 + if err != nil { 151 + return err 152 + } 153 + // Also mark as processed so we don't try again 154 + pb, err := tx.CreateBucketIfNotExists([]byte(processedBucket(did))) 155 + if err != nil { 156 + return err 157 + } 158 + 159 + for _, k := range keys { 160 + if err := b.Put([]byte(k), []byte(errMsg)); err != nil { 161 + return err 162 + } 163 + if err := pb.Put([]byte(k), []byte{0}); err != nil { // 0 for failed 164 + return err 165 + } 166 + } 167 + return nil 168 + }) 169 + } 170 + 171 + func (s *BoltStorage) RemoveFailed(did string, keys ...string) error { 172 + return s.db.Update(func(tx *bbolt.Tx) error { 173 + b := tx.Bucket([]byte(failedBucket(did))) 174 + if b == nil { 175 + return nil 176 + } 177 + for _, k := range keys { 178 + if err := b.Delete([]byte(k)); err != nil { 179 + return err 180 + } 181 + } 182 + return nil 183 + }) 184 + } 185 + 186 + func (s *BoltStorage) GetPublished(did string) (map[string]bool, error) { 187 + res := make(map[string]bool) 188 + err := s.db.View(func(tx *bbolt.Tx) error { 189 + b := tx.Bucket([]byte(processedBucket(did))) 190 + if b == nil { 191 + return nil 192 + } 193 + return b.ForEach(func(k, v []byte) error { 194 + res[string(k)] = true 195 + return nil 196 + }) 197 + }) 198 + return res, err 199 + } 200 + 201 + func (s *BoltStorage) IsValid(did string) bool { 202 + if did == "" { 203 + return false 204 + } 205 + ts, err := s.Timestamp(did) 206 + if err != nil { 207 + return false 208 + } 209 + return time.Since(ts) < CacheTTL 210 + } 211 + 212 + func (s *BoltStorage) Timestamp(did string) (time.Time, error) { 213 + var ts time.Time 214 + err := s.db.View(func(tx *bbolt.Tx) error { 215 + metaBkt := tx.Bucket([]byte(metaBucket())) 216 + if metaBkt == nil { 217 + return ErrCacheNotFound 218 + } 219 + data := metaBkt.Get([]byte(metaPrefixTimestamp + did)) 220 + if data == nil { 221 + return ErrCacheNotFound 222 + } 223 + return json.Unmarshal(data, &ts) 224 + }) 225 + return ts, err 226 + } 227 + 228 + func (s *BoltStorage) setTimestamp(tx *bbolt.Tx, did string) error { 229 + metaBkt, err := tx.CreateBucketIfNotExists([]byte(metaBucket())) 230 + if err != nil { 231 + return fmt.Errorf("create meta bucket: %w", err) 232 + } 233 + ts := time.Now() 234 + data, err := json.Marshal(ts) 235 + if err != nil { 236 + return fmt.Errorf("marshal timestamp: %w", err) 237 + } 238 + return metaBkt.Put([]byte(metaPrefixTimestamp+did), data) 239 + } 240 + 241 + func (s *BoltStorage) Clear(did string) error { 242 + if did == "" { 243 + return errors.New("did cannot be empty") 244 + } 245 + 246 + return s.db.Update(func(tx *bbolt.Tx) error { 247 + if err := tx.DeleteBucket([]byte(recordsBucket(did))); err != nil && !errors.Is(err, bbolt.ErrBucketNotFound) { 248 + return fmt.Errorf("delete records bucket: %w", err) 249 + } 250 + if err := tx.DeleteBucket([]byte(processedBucket(did))); err != nil && !errors.Is(err, bbolt.ErrBucketNotFound) { 251 + return fmt.Errorf("delete processed bucket: %w", err) 252 + } 253 + if err := tx.DeleteBucket([]byte(failedBucket(did))); err != nil && !errors.Is(err, bbolt.ErrBucketNotFound) { 254 + return fmt.Errorf("delete failed bucket: %w", err) 255 + } 256 + metaBkt := tx.Bucket([]byte(metaBucket())) 257 + if metaBkt != nil { 258 + if err := metaBkt.Delete([]byte(metaPrefixTimestamp + did)); err != nil { 259 + return fmt.Errorf("delete timestamp: %w", err) 260 + } 261 + } 262 + return nil 263 + }) 264 + } 265 + 266 + func (s *BoltStorage) ClearAll() error { 267 + return s.db.Update(func(tx *bbolt.Tx) error { 268 + var dids []string 269 + tx.ForEach(func(name []byte, b *bbolt.Bucket) error { 270 + if b == nil { 271 + return nil 272 + } 273 + bucketName := string(name) 274 + if after, ok := strings.CutPrefix(bucketName, "records:"); ok { 275 + dids = append(dids, after) 276 + } 277 + return nil 278 + }) 279 + 280 + for _, did := range dids { 281 + if err := tx.DeleteBucket([]byte(recordsBucket(did))); err != nil && !errors.Is(err, bbolt.ErrBucketNotFound) { 282 + return fmt.Errorf("delete records for %s: %w", did, err) 283 + } 284 + if err := tx.DeleteBucket([]byte(processedBucket(did))); err != nil && !errors.Is(err, bbolt.ErrBucketNotFound) { 285 + return fmt.Errorf("delete processed for %s: %w", did, err) 286 + } 287 + if err := tx.DeleteBucket([]byte(failedBucket(did))); err != nil && !errors.Is(err, bbolt.ErrBucketNotFound) { 288 + return fmt.Errorf("delete failed for %s: %w", did, err) 289 + } 290 + } 291 + 292 + if err := tx.DeleteBucket([]byte(metaBucket())); err != nil && !errors.Is(err, bbolt.ErrBucketNotFound) { 293 + return fmt.Errorf("delete meta: %w", err) 294 + } 295 + 296 + if err := tx.DeleteBucket([]byte("quota")); err != nil && !errors.Is(err, bbolt.ErrBucketNotFound) { 297 + return fmt.Errorf("delete quota: %w", err) 298 + } 299 + 300 + return nil 301 + }) 302 + } 303 + 304 + func (s *BoltStorage) Get(key string) (int, error) { 305 + var val int 306 + err := s.db.View(func(tx *bbolt.Tx) error { 307 + b := tx.Bucket([]byte("quota")) 308 + if b == nil { 309 + return nil 310 + } 311 + v := b.Get([]byte(key)) 312 + if v == nil { 313 + return nil 314 + } 315 + return json.Unmarshal(v, &val) 316 + }) 317 + return val, err 318 + } 319 + 320 + func (s *BoltStorage) Set(key string, val int) error { 321 + return s.db.Update(func(tx *bbolt.Tx) error { 322 + b, err := tx.CreateBucketIfNotExists([]byte("quota")) 323 + if err != nil { 324 + return err 325 + } 326 + v, err := json.Marshal(val) 327 + if err != nil { 328 + return err 329 + } 330 + return b.Put([]byte(key), v) 331 + }) 332 + } 333 + 334 + func (s *BoltStorage) Stats() (DBStats, error) { 335 + var stats DBStats 336 + stats.UserStats = make(map[string]any) 337 + 338 + err := s.db.View(func(tx *bbolt.Tx) error { 339 + return tx.ForEach(func(name []byte, b *bbolt.Bucket) error { 340 + bucketName := string(name) 341 + 342 + if strings.HasPrefix(bucketName, "records:") { 343 + did := strings.TrimPrefix(bucketName, "records:") 344 + count := 0 345 + b.ForEach(func(k, v []byte) error { 346 + count++ 347 + return nil 348 + }) 349 + stats.TotalRecords += count 350 + 351 + userStat := stats.UserStats[did] 352 + if userStat == nil { 353 + userStat = make(map[string]int) 354 + } 355 + m := userStat.(map[string]int) 356 + m["total"] = count 357 + stats.UserStats[did] = m 358 + } 359 + 360 + if strings.HasPrefix(bucketName, "processed:") { 361 + did := strings.TrimPrefix(bucketName, "processed:") 362 + count := 0 363 + b.ForEach(func(k, v []byte) error { 364 + count++ 365 + return nil 366 + }) 367 + stats.MarkedPublished += count 368 + 369 + userStat := stats.UserStats[did] 370 + if userStat == nil { 371 + userStat = make(map[string]int) 372 + } 373 + m := userStat.(map[string]int) 374 + m["published"] = count 375 + stats.UserStats[did] = m 376 + } 377 + 378 + if strings.HasPrefix(bucketName, "failed:") { 379 + did := strings.TrimPrefix(bucketName, "failed:") 380 + count := 0 381 + b.ForEach(func(k, v []byte) error { 382 + count++ 383 + return nil 384 + }) 385 + stats.FailedCount += count 386 + 387 + userStat := stats.UserStats[did] 388 + if userStat == nil { 389 + userStat = make(map[string]int) 390 + } 391 + m := userStat.(map[string]int) 392 + m["failed"] = count 393 + stats.UserStats[did] = m 394 + } 395 + 396 + return nil 397 + }) 398 + }) 399 + if err != nil { 400 + return stats, err 401 + } 402 + 403 + stats.UnpublishedCount = stats.TotalRecords - stats.MarkedPublished 404 + return stats, nil 405 + }
+60
cache/cache.go
··· 1 + package cache 2 + 3 + import ( 4 + "os" 5 + "path/filepath" 6 + "strings" 7 + "time" 8 + ) 9 + 10 + const ( 11 + CacheFile = "cache.db" 12 + CacheTTL = 24 * time.Hour 13 + ) 14 + 15 + type Entry[T any] struct { 16 + URI string `json:"uri"` 17 + CID string `json:"cid"` 18 + Value T `json:"value"` 19 + } 20 + 21 + func recordsBucket(did string) string { 22 + return "records:" + did 23 + } 24 + 25 + func processedBucket(did string) string { 26 + return "processed:" + did 27 + } 28 + 29 + func failedBucket(did string) string { 30 + return "failed:" + did 31 + } 32 + 33 + func metaBucket() string { 34 + return "_meta" 35 + } 36 + 37 + const metaPrefixTimestamp = "ts:" 38 + 39 + func cacheDir() (string, error) { 40 + if dir := os.Getenv("LAZULI_CACHE_DIR"); dir != "" { 41 + return dir, nil 42 + } 43 + 44 + for _, arg := range os.Args { 45 + if strings.HasPrefix(arg, "-test.") { 46 + dir, err := os.MkdirTemp("", "lazuli-test-*") 47 + if err != nil { 48 + return "", err 49 + } 50 + os.Setenv("LAZULI_CACHE_DIR", dir) 51 + return dir, nil 52 + } 53 + } 54 + 55 + cacheDir, err := os.UserCacheDir() 56 + if err != nil { 57 + return "", err 58 + } 59 + return filepath.Join(cacheDir, "lazuli"), nil 60 + }
+101
cache/cache_test.go
··· 1 + package cache 2 + 3 + import ( 4 + "testing" 5 + ) 6 + 7 + func newTestStorage(t *testing.T) *BoltStorage { 8 + t.Helper() 9 + storage, err := NewBoltStorage() 10 + if err != nil { 11 + t.Fatalf("NewBoltStorage failed: %v", err) 12 + } 13 + t.Cleanup(func() { storage.Close() }) 14 + return storage 15 + } 16 + 17 + func TestSaveIterateRoundtrip(t *testing.T) { 18 + storage := newTestStorage(t) 19 + did := "did:plc:test" 20 + 21 + records := map[string][]byte{ 22 + "key1": []byte(`{"trackName":"track1"}`), 23 + "key2": []byte(`{"trackName":"track2"}`), 24 + } 25 + 26 + if err := storage.SaveRecords(did, records); err != nil { 27 + t.Fatalf("SaveRecords failed: %v", err) 28 + } 29 + 30 + count := 0 31 + err := storage.IterateUnpublished(did, func(key string, data []byte) error { 32 + count++ 33 + return nil 34 + }) 35 + if err != nil { 36 + t.Fatalf("IterateUnpublished failed: %v", err) 37 + } 38 + if count != 2 { 39 + t.Errorf("expected 2 records, got %d", count) 40 + } 41 + } 42 + 43 + func TestMarkPublished(t *testing.T) { 44 + storage := newTestStorage(t) 45 + did := "did:plc:test" 46 + 47 + records := map[string][]byte{ 48 + "key1": []byte(`{"trackName":"track1"}`), 49 + "key2": []byte(`{"trackName":"track2"}`), 50 + } 51 + storage.SaveRecords(did, records) 52 + 53 + if err := storage.MarkPublished(did, "key1"); err != nil { 54 + t.Fatalf("MarkPublished failed: %v", err) 55 + } 56 + 57 + count := 0 58 + storage.IterateUnpublished(did, func(key string, data []byte) error { 59 + if key == "key1" { 60 + t.Error("key1 should have been filtered out") 61 + } 62 + count++ 63 + return nil 64 + }) 65 + if count != 1 { 66 + t.Errorf("expected 1 unpublished record, got %d", count) 67 + } 68 + } 69 + 70 + func TestClear(t *testing.T) { 71 + storage := newTestStorage(t) 72 + did := "did:plc:test" 73 + storage.SaveRecords(did, map[string][]byte{"key1": []byte(`{}`)}) 74 + 75 + if !storage.IsValid(did) { 76 + t.Error("cache should be valid") 77 + } 78 + 79 + storage.Clear(did) 80 + 81 + if storage.IsValid(did) { 82 + t.Error("cache should be invalid") 83 + } 84 + } 85 + 86 + func TestQuotaKV(t *testing.T) { 87 + storage := newTestStorage(t) 88 + 89 + err := storage.Set("testkey", 123) 90 + if err != nil { 91 + t.Fatalf("Set failed: %v", err) 92 + } 93 + 94 + val, err := storage.Get("testkey") 95 + if err != nil { 96 + t.Fatalf("Get failed: %v", err) 97 + } 98 + if val != 123 { 99 + t.Errorf("expected 123, got %d", val) 100 + } 101 + }
+38
cache/storage.go
··· 1 + package cache 2 + 3 + import ( 4 + "time" 5 + ) 6 + 7 + type Storage interface { 8 + SaveRecords(did string, records map[string][]byte) error 9 + IterateUnpublished(did string, fn func(key string, rec []byte) error) error 10 + IteratePublished(did string, fn func(key string, rec []byte) error) error 11 + IterateFailed(did string, fn func(key string, rec []byte, errMsg string) error) error 12 + MarkPublished(did string, keys ...string) error 13 + MarkFailed(did string, keys []string, err string) error 14 + RemoveFailed(did string, keys ...string) error 15 + GetPublished(did string) (map[string]bool, error) 16 + 17 + IsValid(did string) bool 18 + Timestamp(did string) (time.Time, error) 19 + 20 + Clear(did string) error 21 + ClearAll() error 22 + Close() error 23 + 24 + // Stats returns database statistics 25 + Stats() (DBStats, error) 26 + 27 + // KVStore implementation 28 + Get(key string) (int, error) 29 + Set(key string, val int) error 30 + } 31 + 32 + type DBStats struct { 33 + TotalRecords int `json:"totalRecords"` 34 + MarkedPublished int `json:"markedPublished"` 35 + FailedCount int `json:"failedCount"` 36 + UnpublishedCount int `json:"unpublishedCount"` 37 + UserStats map[string]any `json:"userStats"` 38 + }
+173
flags.go
··· 1 + package main 2 + 3 + import ( 4 + "time" 5 + 6 + "github.com/urfave/cli/v3" 7 + 8 + "tangled.org/karitham.dev/lazuli/sync" 9 + ) 10 + 11 + const ( 12 + DefaultBatchSize = 20 13 + DefaultBatchDelay = 2000 * time.Millisecond 14 + ) 15 + 16 + const ( 17 + EnvHandle = "LAZULI_HANDLE" 18 + EnvPassword = "LAZULI_PASSWORD" 19 + EnvVerbose = "LAZULI_VERBOSE" 20 + EnvQuiet = "LAZULI_QUIET" 21 + EnvReverse = "LAZULI_REVERSE" 22 + EnvDryRun = "LAZULI_DRY_RUN" 23 + EnvFresh = "LAZULI_FRESH" 24 + EnvClearCache = "LAZULI_CLEAR_CACHE" 25 + EnvYes = "LAZULI_YES" 26 + ) 27 + 28 + var lastfmFlag = &cli.StringFlag{ 29 + Name: "lastfm", 30 + Usage: "Path to Last.fm CSV file or directory", 31 + Sources: cli.EnvVars("LAZULI_LASTFM"), 32 + } 33 + 34 + var spotifyFlag = &cli.StringFlag{ 35 + Name: "spotify", 36 + Usage: "Path to Spotify JSON/directory/zip", 37 + Sources: cli.EnvVars("LAZULI_SPOTIFY"), 38 + } 39 + 40 + var ( 41 + verboseCount int 42 + quietCount int 43 + ) 44 + 45 + var commonFlags = []cli.Flag{ 46 + &cli.StringFlag{ 47 + Name: "handle", 48 + Usage: "Bluesky handle", 49 + Sources: cli.EnvVars(EnvHandle), 50 + }, 51 + &cli.StringFlag{ 52 + Name: "password", 53 + Usage: "App password", 54 + Sources: cli.EnvVars(EnvPassword), 55 + }, 56 + &cli.BoolFlag{ 57 + Name: "verbose", 58 + Usage: "Enable verbose logging (-v for debug, -vv for trace)", 59 + Aliases: []string{"v"}, 60 + Sources: cli.EnvVars(EnvVerbose), 61 + Config: cli.BoolConfig{Count: &verboseCount}, 62 + }, 63 + &cli.BoolFlag{ 64 + Name: "quiet", 65 + Usage: "Suppress non-essential output (-q for warn, -qq for errors, -qqq for silent)", 66 + Aliases: []string{"q"}, 67 + Sources: cli.EnvVars(EnvQuiet), 68 + Config: cli.BoolConfig{Count: &quietCount}, 69 + }, 70 + &cli.StringFlag{ 71 + Name: "output-format", 72 + Usage: "Output format: text or json", 73 + Value: "text", 74 + Sources: cli.EnvVars("LAZULI_OUTPUT_FORMAT"), 75 + }, 76 + } 77 + 78 + var exportFlags = []cli.Flag{ 79 + lastfmFlag, 80 + spotifyFlag, 81 + &cli.StringFlag{ 82 + Name: "output", 83 + Usage: "Output file (stdout if not set)", 84 + Sources: cli.EnvVars("LAZULI_OUTPUT"), 85 + }, 86 + &cli.BoolFlag{ 87 + Name: "reverse", 88 + Usage: "Sort records reverse chronologically", 89 + Sources: cli.EnvVars(EnvReverse), 90 + }, 91 + &cli.DurationFlag{ 92 + Name: "tolerance", 93 + Usage: "Time tolerance for cross-source deduplication (e.g., 5m, 10m)", 94 + Value: sync.DefaultCrossSourceTolerance, 95 + Sources: cli.EnvVars("LAZULI_TOLERANCE"), 96 + }, 97 + } 98 + 99 + var importFlags = []cli.Flag{ 100 + lastfmFlag, 101 + spotifyFlag, 102 + &cli.StringFlag{ 103 + Name: "mode", 104 + Usage: "Import mode: lastfm, spotify, combined (default: combined)", 105 + Value: "combined", 106 + Sources: cli.EnvVars("LAZULI_MODE"), 107 + }, 108 + &cli.BoolFlag{ 109 + Name: "dry-run", 110 + Usage: "Preview without publishing", 111 + Sources: cli.EnvVars(EnvDryRun), 112 + }, 113 + &cli.BoolFlag{ 114 + Name: "reverse", 115 + Usage: "Import in reverse order", 116 + Sources: cli.EnvVars(EnvReverse), 117 + }, 118 + &cli.BoolFlag{ 119 + Name: "fresh", 120 + Usage: "Don't use cached Bluesky records", 121 + Sources: cli.EnvVars(EnvFresh), 122 + }, 123 + &cli.BoolFlag{ 124 + Name: "clear-cache", 125 + Usage: "Clear cache before running", 126 + Sources: cli.EnvVars(EnvClearCache), 127 + }, 128 + &cli.IntFlag{ 129 + Name: "batch-size", 130 + Usage: "Records per batch (default: 20)", 131 + Value: DefaultBatchSize, 132 + Sources: cli.EnvVars("LAZULI_BATCH_SIZE"), 133 + }, 134 + &cli.IntFlag{ 135 + Name: "batch-delay", 136 + Usage: "MS between batches (default: 2000)", 137 + Value: int(DefaultBatchDelay.Milliseconds()), 138 + Sources: cli.EnvVars("LAZULI_BATCH_DELAY"), 139 + }, 140 + &cli.DurationFlag{ 141 + Name: "tolerance", 142 + Usage: "Time tolerance for cross-source deduplication (e.g., 5m, 10m)", 143 + Value: sync.DefaultCrossSourceTolerance, 144 + Sources: cli.EnvVars("LAZULI_TOLERANCE"), 145 + }, 146 + } 147 + 148 + var syncFlags = []cli.Flag{ 149 + &cli.BoolFlag{ 150 + Name: "fresh", 151 + Usage: "Force refresh cache", 152 + Sources: cli.EnvVars(EnvFresh), 153 + }, 154 + } 155 + 156 + var dedupeFlags = []cli.Flag{ 157 + &cli.BoolFlag{ 158 + Name: "dry-run", 159 + Usage: "Preview without deleting", 160 + Sources: cli.EnvVars(EnvDryRun), 161 + }, 162 + &cli.BoolFlag{ 163 + Name: "fresh", 164 + Usage: "Force refresh cache", 165 + Sources: cli.EnvVars(EnvFresh), 166 + }, 167 + &cli.BoolFlag{ 168 + Name: "yes", 169 + Usage: "Skip confirmation prompt", 170 + Aliases: []string{"y"}, 171 + Sources: cli.EnvVars(EnvYes), 172 + }, 173 + }
+61
flake.lock
··· 1 + { 2 + "nodes": { 3 + "flake-parts": { 4 + "inputs": { 5 + "nixpkgs-lib": "nixpkgs-lib" 6 + }, 7 + "locked": { 8 + "lastModified": 1768135262, 9 + "narHash": "sha256-PVvu7OqHBGWN16zSi6tEmPwwHQ4rLPU9Plvs8/1TUBY=", 10 + "owner": "hercules-ci", 11 + "repo": "flake-parts", 12 + "rev": "80daad04eddbbf5a4d883996a73f3f542fa437ac", 13 + "type": "github" 14 + }, 15 + "original": { 16 + "owner": "hercules-ci", 17 + "repo": "flake-parts", 18 + "type": "github" 19 + } 20 + }, 21 + "nixpkgs": { 22 + "locked": { 23 + "lastModified": 1768564909, 24 + "narHash": "sha256-Kell/SpJYVkHWMvnhqJz/8DqQg2b6PguxVWOuadbHCc=", 25 + "owner": "NixOS", 26 + "repo": "nixpkgs", 27 + "rev": "e4bae1bd10c9c57b2cf517953ab70060a828ee6f", 28 + "type": "github" 29 + }, 30 + "original": { 31 + "owner": "NixOS", 32 + "ref": "nixos-unstable", 33 + "repo": "nixpkgs", 34 + "type": "github" 35 + } 36 + }, 37 + "nixpkgs-lib": { 38 + "locked": { 39 + "lastModified": 1765674936, 40 + "narHash": "sha256-k00uTP4JNfmejrCLJOwdObYC9jHRrr/5M/a/8L2EIdo=", 41 + "owner": "nix-community", 42 + "repo": "nixpkgs.lib", 43 + "rev": "2075416fcb47225d9b68ac469a5c4801a9c4dd85", 44 + "type": "github" 45 + }, 46 + "original": { 47 + "owner": "nix-community", 48 + "repo": "nixpkgs.lib", 49 + "type": "github" 50 + } 51 + }, 52 + "root": { 53 + "inputs": { 54 + "flake-parts": "flake-parts", 55 + "nixpkgs": "nixpkgs" 56 + } 57 + } 58 + }, 59 + "root": "root", 60 + "version": 7 61 + }
+45
flake.nix
··· 1 + { 2 + inputs = { 3 + flake-parts.url = "github:hercules-ci/flake-parts"; 4 + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; 5 + }; 6 + outputs = 7 + inputs@{ flake-parts, ... }: 8 + flake-parts.lib.mkFlake { inherit inputs; } { 9 + systems = [ 10 + "x86_64-linux" 11 + "aarch64-linux" 12 + "aarch64-darwin" 13 + "x86_64-darwin" 14 + ]; 15 + perSystem = 16 + { pkgs, ... }: 17 + let 18 + lazuli = pkgs.buildGoModule rec { 19 + name = "lazuli"; 20 + version = "0.1.0"; 21 + src = pkgs.nix-gitignore.gitignoreSource [ "*.csv" "*.zip" "*.json" ] ./.; 22 + vendorHash = "sha256-Zr9gGytJARMbf/7120HYkKsfzpeW47MkwdMODD9QTKc="; 23 + ldflags = [ 24 + "-X" 25 + "main.Version=${version}" 26 + ]; 27 + meta = { 28 + mainPackage = "lazuli"; 29 + }; 30 + }; 31 + in 32 + { 33 + packages = { 34 + default = lazuli; 35 + lazuli = lazuli; 36 + }; 37 + devShells.default = pkgs.mkShell { 38 + buildInputs = [ 39 + pkgs.go 40 + pkgs.gofumpt 41 + ]; 42 + }; 43 + }; 44 + }; 45 + }
+29
go.mod
··· 1 + module tangled.org/karitham.dev/lazuli 2 + 3 + go 1.25.5 4 + 5 + require ( 6 + github.com/bluesky-social/indigo v0.0.0-20260114211028-207c9d49d0de 7 + github.com/urfave/cli/v3 v3.6.1 8 + go.etcd.io/bbolt v1.3.10 9 + golang.org/x/text v0.14.0 10 + golang.org/x/time v0.3.0 11 + ) 12 + 13 + require ( 14 + github.com/beorn7/perks v1.0.1 // indirect 15 + github.com/cespare/xxhash/v2 v2.2.0 // indirect 16 + github.com/earthboundkid/versioninfo/v2 v2.24.1 // indirect 17 + github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect 18 + github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect 19 + github.com/mr-tron/base58 v1.2.0 // indirect 20 + github.com/prometheus/client_golang v1.17.0 // indirect 21 + github.com/prometheus/client_model v0.5.0 // indirect 22 + github.com/prometheus/common v0.45.0 // indirect 23 + github.com/prometheus/procfs v0.12.0 // indirect 24 + gitlab.com/yawning/secp256k1-voi v0.0.0-20230925100816-f2616030848b // indirect 25 + gitlab.com/yawning/tuplehash v0.0.0-20230713102510-df83abbf9a02 // indirect 26 + golang.org/x/crypto v0.21.0 // indirect 27 + golang.org/x/sys v0.22.0 // indirect 28 + google.golang.org/protobuf v1.33.0 // indirect 29 + )
+76
go.sum
··· 1 + github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= 2 + github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= 3 + github.com/bluesky-social/indigo v0.0.0-20260114211028-207c9d49d0de h1:75emVEzhTQWXwAQoBZV4/Bg2NEULZSgRwLFAdTccTrY= 4 + github.com/bluesky-social/indigo v0.0.0-20260114211028-207c9d49d0de/go.mod h1:KIy0FgNQacp4uv2Z7xhNkV3qZiUSGuRky97s7Pa4v+o= 5 + github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= 6 + github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 7 + github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 8 + github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 9 + github.com/earthboundkid/versioninfo/v2 v2.24.1 h1:SJTMHaoUx3GzjjnUO1QzP3ZXK6Ee/nbWyCm58eY3oUg= 10 + github.com/earthboundkid/versioninfo/v2 v2.24.1/go.mod h1:VcWEooDEuyUJnMfbdTh0uFN4cfEIg+kHMuWB2CDCLjw= 11 + github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= 12 + github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 13 + github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= 14 + github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= 15 + github.com/ipfs/go-cid v0.4.1 h1:A/T3qGvxi4kpKWWcPC/PgbvDA2bjVLO7n4UeVwnbs/s= 16 + github.com/ipfs/go-cid v0.4.1/go.mod h1:uQHwDeX4c6CtyrFwdqyhpNcxVewur1M7l7fNU7LKwZk= 17 + github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM= 18 + github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= 19 + github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 h1:jWpvCLoY8Z/e3VKvlsiIGKtc+UG6U5vzxaoagmhXfyg= 20 + github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0/go.mod h1:QUyp042oQthUoa9bqDv0ER0wrtXnBruoNd7aNjkbP+k= 21 + github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dzMM= 22 + github.com/minio/sha256-simd v1.0.1/go.mod h1:Pz6AKMiUdngCLpeTL/RJY1M9rUuPMYujV5xJjtbRSN8= 23 + github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o= 24 + github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= 25 + github.com/multiformats/go-base32 v0.1.0 h1:pVx9xoSPqEIQG8o+UbAe7DNi51oej1NtK+aGkbLYxPE= 26 + github.com/multiformats/go-base32 v0.1.0/go.mod h1:Kj3tFY6zNr+ABYMqeUNeGvkIC/UYgtWibDcT0rExnbI= 27 + github.com/multiformats/go-base36 v0.2.0 h1:lFsAbNOGeKtuKozrtBsAkSVhv1p9D0/qedU9rQyccr0= 28 + github.com/multiformats/go-base36 v0.2.0/go.mod h1:qvnKE++v+2MWCfePClUEjE78Z7P2a1UV0xHgWc0hkp4= 29 + github.com/multiformats/go-multibase v0.2.0 h1:isdYCVLvksgWlMW9OZRYJEa9pZETFivncJHmHnnd87g= 30 + github.com/multiformats/go-multibase v0.2.0/go.mod h1:bFBZX4lKCA/2lyOFSAoKH5SS6oPyjtnzK/XTFDPkNuk= 31 + github.com/multiformats/go-multihash v0.2.3 h1:7Lyc8XfX/IY2jWb/gI7JP+o7JEq9hOa7BFvVU9RSh+U= 32 + github.com/multiformats/go-multihash v0.2.3/go.mod h1:dXgKXCXjBzdscBLk9JkjINiEsCKRVch90MdaGiKsvSM= 33 + github.com/multiformats/go-varint v0.0.7 h1:sWSGR+f/eu5ABZA2ZpYKBILXTTs9JWpdEM/nEGOHFS8= 34 + github.com/multiformats/go-varint v0.0.7/go.mod h1:r8PUYw/fD/SjBCiKOoDlGF6QawOELpZAu9eioSos/OU= 35 + github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 36 + github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 37 + github.com/prometheus/client_golang v1.17.0 h1:rl2sfwZMtSthVU752MqfjQozy7blglC+1SOtjMAMh+Q= 38 + github.com/prometheus/client_golang v1.17.0/go.mod h1:VeL+gMmOAxkS2IqfCq0ZmHSL+LjWfWDUmp1mBz9JgUY= 39 + github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= 40 + github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= 41 + github.com/prometheus/common v0.45.0 h1:2BGz0eBc2hdMDLnO/8n0jeB3oPrt2D08CekT0lneoxM= 42 + github.com/prometheus/common v0.45.0/go.mod h1:YJmSTw9BoKxJplESWWxlbyttQR4uaEcGyv9MZjVOJsY= 43 + github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= 44 + github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= 45 + github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= 46 + github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= 47 + github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= 48 + github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= 49 + github.com/urfave/cli/v3 v3.6.1 h1:j8Qq8NyUawj/7rTYdBGrxcH7A/j7/G8Q5LhWEW4G3Mo= 50 + github.com/urfave/cli/v3 v3.6.1/go.mod h1:ysVLtOEmg2tOy6PknnYVhDoouyC/6N42TMeoMzskhso= 51 + github.com/whyrusleeping/cbor-gen v0.2.1-0.20241030202151-b7a6831be65e h1:28X54ciEwwUxyHn9yrZfl5ojgF4CBNLWX7LR0rvBkf4= 52 + github.com/whyrusleeping/cbor-gen v0.2.1-0.20241030202151-b7a6831be65e/go.mod h1:pM99HXyEbSQHcosHc0iW7YFmwnscr+t9Te4ibko05so= 53 + gitlab.com/yawning/secp256k1-voi v0.0.0-20230925100816-f2616030848b h1:CzigHMRySiX3drau9C6Q5CAbNIApmLdat5jPMqChvDA= 54 + gitlab.com/yawning/secp256k1-voi v0.0.0-20230925100816-f2616030848b/go.mod h1:/y/V339mxv2sZmYYR64O07VuCpdNZqCTwO8ZcouTMI8= 55 + gitlab.com/yawning/tuplehash v0.0.0-20230713102510-df83abbf9a02 h1:qwDnMxjkyLmAFgcfgTnfJrmYKWhHnci3GjDqcZp1M3Q= 56 + gitlab.com/yawning/tuplehash v0.0.0-20230713102510-df83abbf9a02/go.mod h1:JTnUj0mpYiAsuZLmKjTx/ex3AtMowcCgnE7YNyCEP0I= 57 + go.etcd.io/bbolt v1.3.10 h1:+BqfJTcCzTItrop8mq/lbzL8wSGtj94UO/3U31shqG0= 58 + go.etcd.io/bbolt v1.3.10/go.mod h1:bK3UQLPJZly7IlNmV7uVHJDxfe5aK9Ll93e/74Y9oEQ= 59 + golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= 60 + golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= 61 + golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= 62 + golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 63 + golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= 64 + golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 65 + golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= 66 + golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= 67 + golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= 68 + golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= 69 + golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= 70 + golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= 71 + google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= 72 + google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= 73 + gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 74 + gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 75 + lukechampine.com/blake3 v1.2.1 h1:YuqqRuaqsGV71BV/nm9xlI0MKUv4QC54jQnBChWbGnI= 76 + lukechampine.com/blake3 v1.2.1/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1LM6k=
+845
main.go
··· 1 + package main 2 + 3 + import ( 4 + "context" 5 + "encoding/json" 6 + "fmt" 7 + "io" 8 + "log/slog" 9 + "os" 10 + "slices" 11 + "strings" 12 + "time" 13 + 14 + "tangled.org/karitham.dev/lazuli/cache" 15 + "tangled.org/karitham.dev/lazuli/sources/lastfm" 16 + "tangled.org/karitham.dev/lazuli/sources/spotify" 17 + "tangled.org/karitham.dev/lazuli/sync" 18 + "tangled.org/karitham.dev/lazuli/sync/logutil" 19 + 20 + "github.com/urfave/cli/v3" 21 + ) 22 + 23 + var Version = "dev" 24 + 25 + type App struct { 26 + log *slog.Logger 27 + outputFormat string 28 + storage cache.Storage 29 + } 30 + 31 + func main() { 32 + var code int 33 + if err := run(); err != nil { 34 + code = 1 35 + fmt.Fprintf(os.Stderr, "Error: %v\n", err) 36 + } 37 + os.Exit(code) 38 + } 39 + 40 + func run() error { 41 + sync.UserAgent = "lazuli/" + Version 42 + sync.ClientAgent = "lazuli/" + Version 43 + 44 + storage, err := cache.NewBoltStorage() 45 + if err != nil { 46 + return fmt.Errorf("open cache: %w", err) 47 + } 48 + 49 + app := &App{storage: storage} 50 + 51 + cmd := &cli.Command{ 52 + Name: "lazuli", 53 + Usage: "Import Last.fm and Spotify listening history to Bluesky", 54 + Commands: []*cli.Command{ 55 + app.exportCommand(), 56 + app.importCommand(), 57 + app.syncCommand(), 58 + app.statsCommand(), 59 + app.failedCommand(), 60 + app.retryCommand(), 61 + app.dedupeCommand(), 62 + app.debugCommand(), 63 + app.versionCommand(), 64 + }, 65 + After: func(ctx context.Context, cmd *cli.Command) error { 66 + return storage.Close() 67 + }, 68 + } 69 + 70 + return cmd.Run(context.Background(), os.Args) 71 + } 72 + 73 + func (a *App) exportCommand() *cli.Command { 74 + flags := make([]cli.Flag, 0, len(exportFlags)+len(commonFlags)) 75 + flags = append(flags, exportFlags...) 76 + flags = append(flags, commonFlags...) 77 + return &cli.Command{ 78 + Name: "export", 79 + Usage: "Parse and merge Last.fm/Spotify exports, output JSON", 80 + UsageText: " lazuli export --lastfm=/path/to/lastfm.csv --spotify=/path/to/spotify.json -o merged.json", 81 + Flags: flags, 82 + Action: a.runExport, 83 + Before: a.initLoggerBefore, 84 + } 85 + } 86 + 87 + func (a *App) importCommand() *cli.Command { 88 + flags := make([]cli.Flag, 0, len(importFlags)+len(commonFlags)) 89 + flags = append(flags, importFlags...) 90 + flags = append(flags, commonFlags...) 91 + return &cli.Command{ 92 + Name: "import", 93 + Usage: "Import listening history to Bluesky", 94 + UsageText: " lazuli import --handle=user.bsky.social --password=app-password --lastfm=plays.csv", 95 + Flags: flags, 96 + Action: a.runImport, 97 + Before: a.initLoggerBefore, 98 + } 99 + } 100 + 101 + func (a *App) syncCommand() *cli.Command { 102 + flags := make([]cli.Flag, 0, len(syncFlags)+len(commonFlags)) 103 + flags = append(flags, syncFlags...) 104 + flags = append(flags, commonFlags...) 105 + return &cli.Command{ 106 + Name: "sync", 107 + Usage: "Fetch existing records, show stats, filter new records", 108 + UsageText: " lazuli sync --handle=user.bsky.social --password=app-password", 109 + Flags: flags, 110 + Action: a.runSync, 111 + Before: a.initLoggerBefore, 112 + } 113 + } 114 + 115 + func (a *App) dedupeCommand() *cli.Command { 116 + flags := make([]cli.Flag, 0, len(dedupeFlags)+len(commonFlags)) 117 + flags = append(flags, dedupeFlags...) 118 + flags = append(flags, commonFlags...) 119 + return &cli.Command{ 120 + Name: "dedupe", 121 + Usage: "Find and remove duplicate records", 122 + UsageText: " lazuli dedupe --handle=user.bsky.social --password=app-password\n lazuli dedupe --handle=user.bsky.social --password=app-password --dry-run", 123 + Flags: flags, 124 + Action: a.runDedupe, 125 + Before: a.initLoggerBefore, 126 + } 127 + } 128 + 129 + func (a *App) debugCommand() *cli.Command { 130 + flags := make([]cli.Flag, 0, len(commonFlags)) 131 + flags = append(flags, commonFlags...) 132 + return &cli.Command{ 133 + Name: "debug", 134 + Usage: "Fetch and dump raw records for debugging", 135 + UsageText: " lazuli debug --handle=user.bsky.social --password=app-password", 136 + Flags: flags, 137 + Action: a.runDebugFetch, 138 + Before: a.initLoggerBefore, 139 + } 140 + } 141 + 142 + func (a *App) statsCommand() *cli.Command { 143 + flags := make([]cli.Flag, 0, len(commonFlags)) 144 + flags = append(flags, commonFlags...) 145 + return &cli.Command{ 146 + Name: "stats", 147 + Usage: "Display statistics about the local database and rate limits", 148 + UsageText: " lazuli stats", 149 + Flags: flags, 150 + Action: a.runStats, 151 + Before: a.initLoggerBefore, 152 + } 153 + } 154 + 155 + func (a *App) runStats(ctx context.Context, cmd *cli.Command) error { 156 + stats, err := a.storage.Stats() 157 + if err != nil { 158 + return fmt.Errorf("failed to get database stats: %w", err) 159 + } 160 + 161 + limiter := sync.NewRateLimiter(a.storage) 162 + writes, global := limiter.Stats() 163 + 164 + if a.outputFormat == "json" { 165 + out := map[string]any{ 166 + "db": stats, 167 + "rateLimits": map[string]any{ 168 + "writesConsumed": writes, 169 + "globalConsumed": global, 170 + "writesLimit": sync.WriteLimitDay, 171 + "globalLimit": sync.GlobalLimitDay, 172 + "writesRemaining": sync.WriteLimitDay - writes, 173 + "globalRemaining": sync.GlobalLimitDay - global, 174 + }, 175 + } 176 + data, _ := json.MarshalIndent(out, "", " ") 177 + fmt.Println(string(data)) 178 + return nil 179 + } 180 + 181 + fmt.Println("Database Statistics:") 182 + fmt.Printf(" Total Records: %d\n", stats.TotalRecords) 183 + fmt.Printf(" Marked Published: %d\n", stats.MarkedPublished) 184 + fmt.Printf(" Failed Count: %d\n", stats.FailedCount) 185 + fmt.Printf(" Unpublished Count: %d\n", stats.UnpublishedCount) 186 + 187 + if len(stats.UserStats) > 0 { 188 + fmt.Println("\nUser Statistics:") 189 + for did, s := range stats.UserStats { 190 + m := s.(map[string]int) 191 + fmt.Printf(" %s:\n", did) 192 + fmt.Printf(" Total: %d\n", m["total"]) 193 + fmt.Printf(" Published: %d\n", m["published"]) 194 + fmt.Printf(" Failed: %d\n", m["failed"]) 195 + fmt.Printf(" Pending: %d\n", m["total"]-m["published"]-m["failed"]) 196 + } 197 + } 198 + 199 + fmt.Println("\nRate Limit Consumption (Today):") 200 + fmt.Printf(" Writes: %d / %d (Remaining: %d)\n", writes, sync.WriteLimitDay, sync.WriteLimitDay-writes) 201 + fmt.Printf(" Global: %d / %d (Remaining: %d)\n", global, sync.GlobalLimitDay, sync.GlobalLimitDay-global) 202 + 203 + return nil 204 + } 205 + 206 + func (a *App) failedCommand() *cli.Command { 207 + flags := make([]cli.Flag, 0, len(commonFlags)) 208 + flags = append(flags, commonFlags...) 209 + return &cli.Command{ 210 + Name: "failed", 211 + Usage: "List records that failed to publish", 212 + UsageText: " lazuli failed --handle=user.bsky.social", 213 + Flags: flags, 214 + Action: a.runFailed, 215 + Before: a.initLoggerBefore, 216 + } 217 + } 218 + 219 + func (a *App) retryCommand() *cli.Command { 220 + flags := make([]cli.Flag, 0, len(commonFlags)+1) 221 + flags = append(flags, commonFlags...) 222 + flags = append(flags, &cli.BoolFlag{ 223 + Name: "dry-run", 224 + Usage: "Preview what will be retried", 225 + Sources: cli.EnvVars(EnvDryRun), 226 + }) 227 + return &cli.Command{ 228 + Name: "retry", 229 + Usage: "Retry failed records one by one", 230 + UsageText: " lazuli retry --handle=user.bsky.social", 231 + Flags: flags, 232 + Action: a.runRetry, 233 + Before: a.initLoggerBefore, 234 + } 235 + } 236 + 237 + func (a *App) runRetry(ctx context.Context, cmd *cli.Command) error { 238 + authClient, err := a.prepareAuth(ctx, cmd) 239 + if err != nil { 240 + return err 241 + } 242 + did := authClient.GetDID() 243 + dryRun := cmd.Bool("dry-run") 244 + 245 + limiter := sync.NewRateLimiter(a.storage) 246 + repoClient := sync.NewRateClient(authClient.GetAPIClient(), did, limiter) 247 + 248 + var failedRecords []struct { 249 + key string 250 + rec sync.PlayRecord 251 + } 252 + 253 + err = a.storage.IterateFailed(did, func(key string, rec []byte, errMsg string) error { 254 + var playRec sync.PlayRecord 255 + if err := json.Unmarshal(rec, &playRec); err != nil { 256 + return nil 257 + } 258 + failedRecords = append(failedRecords, struct { 259 + key string 260 + rec sync.PlayRecord 261 + }{key, playRec}) 262 + return nil 263 + }) 264 + if err != nil { 265 + return fmt.Errorf("failed to load failed records: %w", err) 266 + } 267 + 268 + if len(failedRecords) == 0 { 269 + fmt.Println("No failed records to retry.") 270 + return nil 271 + } 272 + 273 + fmt.Printf("Retrying %d failed records for %s...\n", len(failedRecords), did) 274 + 275 + successCount := 0 276 + errorCount := 0 277 + 278 + for _, fr := range failedRecords { 279 + if dryRun { 280 + fmt.Printf("[DRY-RUN] Would retry: %s - %s\n", getArtistName(fr.rec), fr.rec.TrackName) 281 + successCount++ 282 + continue 283 + } 284 + 285 + // Check rate limit for 1 write 286 + if err := limiter.AllowBulkWrite(ctx, 1); err != nil { 287 + return fmt.Errorf("rate limit wait failed: %w", err) 288 + } 289 + 290 + w, g := limiter.Stats() 291 + res := sync.PublishBatch(ctx, repoClient, did, []sync.PlayRecord{fr.rec}, w, g, a.storage) 292 + 293 + if res.ErrorCount == 0 { 294 + fmt.Printf("Successfully retried: %s - %s\n", getArtistName(fr.rec), fr.rec.TrackName) 295 + // Mark as published (updates processedBucket to 1) 296 + if err := a.storage.MarkPublished(did, fr.key); err != nil { 297 + a.log.Error("Failed to mark record as published", logutil.Error(err), slog.String("key", fr.key)) 298 + } 299 + // Remove from failedBucket 300 + if err := a.storage.RemoveFailed(did, fr.key); err != nil { 301 + a.log.Error("Failed to remove record from failed list", logutil.Error(err), slog.String("key", fr.key)) 302 + } 303 + successCount++ 304 + } else { 305 + fmt.Printf("Failed again: %s - %s: %v\n", getArtistName(fr.rec), fr.rec.TrackName, res.LastError) 306 + errorCount++ 307 + limiter.RefundBulkWrite(1) 308 + } 309 + 310 + // Optional: small delay between retries? 311 + // The rate limiter already handles the delay. 312 + } 313 + 314 + fmt.Printf("\nRetry complete: %d succeeded, %d failed.\n", successCount, errorCount) 315 + return nil 316 + } 317 + 318 + func (a *App) runFailed(ctx context.Context, cmd *cli.Command) error { 319 + // We need the DID to look up the user's failed records. 320 + // If it's already in the handle, use it, otherwise we might need to resolve it. 321 + // For simplicity, we'll try to get it from auth or resolve it. 322 + authClient, err := a.prepareAuth(ctx, cmd) 323 + if err != nil { 324 + return err 325 + } 326 + did := authClient.GetDID() 327 + 328 + type FailedRecord struct { 329 + Key string `json:"key"` 330 + Error string `json:"error"` 331 + Record sync.PlayRecord `json:"record"` 332 + } 333 + 334 + var failed []FailedRecord 335 + err = a.storage.IterateFailed(did, func(key string, rec []byte, errMsg string) error { 336 + var playRec sync.PlayRecord 337 + _ = json.Unmarshal(rec, &playRec) 338 + failed = append(failed, FailedRecord{ 339 + Key: key, 340 + Error: errMsg, 341 + Record: playRec, 342 + }) 343 + return nil 344 + }) 345 + if err != nil { 346 + return fmt.Errorf("failed to iterate failed records: %w", err) 347 + } 348 + 349 + if a.outputFormat == "json" { 350 + data, _ := json.MarshalIndent(failed, "", " ") 351 + fmt.Println(string(data)) 352 + return nil 353 + } 354 + 355 + if len(failed) == 0 { 356 + fmt.Println("No failed records found.") 357 + return nil 358 + } 359 + 360 + fmt.Printf("Failed Records for %s (%d):\n", did, len(failed)) 361 + for _, f := range failed { 362 + fmt.Printf(" [%s] %s - %s: %s\n", 363 + f.Record.PlayedTime.Format(time.RFC3339), 364 + getArtistName(f.Record), 365 + f.Record.TrackName, 366 + f.Error) 367 + } 368 + 369 + return nil 370 + } 371 + 372 + func (a *App) versionCommand() *cli.Command { 373 + return &cli.Command{ 374 + Name: "version", 375 + Usage: "Print the version number", 376 + Action: func(ctx context.Context, cmd *cli.Command) error { 377 + fmt.Println(Version) 378 + return nil 379 + }, 380 + } 381 + } 382 + 383 + func (a *App) runDebugFetch(ctx context.Context, cmd *cli.Command) error { 384 + authClient, err := a.prepareAuth(ctx, cmd) 385 + if err != nil { 386 + return fmt.Errorf("authentication failed: %w\nHint: Make sure your credentials are correct and you have network access.", err) 387 + } 388 + 389 + repoClient := sync.NewRateClient(authClient.GetAPIClient(), authClient.GetDID(), nil) 390 + 391 + records, _, err := repoClient.ListRecords(ctx, sync.RecordType, 10, "") 392 + if err != nil { 393 + return fmt.Errorf("failed to fetch records from Bluesky: %w\nHint: Check your network connection and try again.", err) 394 + } 395 + 396 + enc := json.NewEncoder(os.Stdout) 397 + enc.SetIndent("", " ") 398 + for _, r := range records { 399 + if err := enc.Encode(r); err != nil { 400 + return fmt.Errorf("failed to encode record: %w", err) 401 + } 402 + } 403 + 404 + return nil 405 + } 406 + 407 + func (a *App) initLoggerBefore(ctx context.Context, cmd *cli.Command) (context.Context, error) { 408 + var level slog.Level 409 + 410 + switch verbosity := verboseCount - quietCount; { 411 + case verbosity >= 2: 412 + level = slog.LevelDebug 413 + case verbosity == 1: 414 + level = slog.LevelInfo 415 + case verbosity <= -1: 416 + level = slog.LevelError 417 + default: 418 + level = slog.LevelInfo 419 + } 420 + 421 + a.outputFormat = cmd.String("output-format") 422 + 423 + var handler slog.Handler = slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: level}) 424 + if a.outputFormat == "json" { 425 + handler = slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: level}) 426 + } 427 + 428 + a.log = slog.New(handler) 429 + slog.SetDefault(a.log) 430 + return ctx, nil 431 + } 432 + 433 + func (a *App) getCredentials(cmd *cli.Command) (string, string, error) { 434 + handle := cmd.String("handle") 435 + password := cmd.String("password") 436 + 437 + if handle == "" { 438 + return "", "", fmt.Errorf("Bluesky handle is required (set --handle or set the LAZULI_HANDLE environment variable)") 439 + } 440 + if password == "" { 441 + return "", "", fmt.Errorf("app password is required (set --password or set the LAZULI_PASSWORD environment variable)") 442 + } 443 + 444 + return handle, password, nil 445 + } 446 + 447 + func (a *App) prepareAuth(ctx context.Context, cmd *cli.Command) (*sync.Client, error) { 448 + handle, password, err := a.getCredentials(cmd) 449 + if err != nil { 450 + return nil, err 451 + } 452 + 453 + authClient, err := sync.NewClient(ctx, handle, password) 454 + if err != nil { 455 + return nil, fmt.Errorf("create auth client: %w", err) 456 + } 457 + 458 + return authClient, nil 459 + } 460 + 461 + func (a *App) runExport(ctx context.Context, cmd *cli.Command) error { 462 + a.log.Info("Starting export operation") 463 + 464 + lastfmPath := cmd.String("lastfm") 465 + spotifyPath := cmd.String("spotify") 466 + outputPath := cmd.String("output") 467 + reverse := cmd.Bool("reverse") 468 + tolerance := cmd.Duration("tolerance") 469 + 470 + lastfmRecords, err := sync.ParseInput(ctx, lastfmPath, lastfm.Parser{}) 471 + if err != nil { 472 + return fmt.Errorf("parse lastfm: %w", err) 473 + } 474 + a.log.Info("Loaded Last.fm records", slog.Int("count", len(lastfmRecords))) 475 + 476 + spotifyRecords, err := sync.ParseInput(ctx, spotifyPath, spotify.Parser{}) 477 + if err != nil { 478 + return fmt.Errorf("parse spotify: %w", err) 479 + } 480 + a.log.Info("Loaded Spotify records", slog.Int("count", len(spotifyRecords))) 481 + 482 + mergedRecords, stats := sync.MergeRecords(lastfmRecords, spotifyRecords, tolerance) 483 + a.log.Info("Merged records", slog.Int("merged_total", stats.MergedTotal), slog.Int("duplicates_removed", stats.DuplicatesRemoved)) 484 + 485 + if reverse { 486 + slices.Reverse(mergedRecords) 487 + } 488 + 489 + return a.outputRecords(mergedRecords, outputPath) 490 + } 491 + 492 + func (a *App) runImport(ctx context.Context, cmd *cli.Command) error { 493 + handle, password, err := a.getCredentials(cmd) 494 + if err != nil { 495 + return err 496 + } 497 + 498 + a.log.Info("Starting import operation", logutil.DID(handle)) 499 + 500 + lastfmPath := cmd.String("lastfm") 501 + spotifyPath := cmd.String("spotify") 502 + modeStr := cmd.String("mode") 503 + dryRun := cmd.Bool("dry-run") 504 + reverse := cmd.Bool("reverse") 505 + fresh := cmd.Bool("fresh") 506 + clearCache := cmd.Bool("clear-cache") 507 + batchSize := int(cmd.Int("batch-size")) 508 + batchDelay := int(cmd.Int("batch-delay")) 509 + tolerance := cmd.Duration("tolerance") 510 + 511 + if clearCache { 512 + if err := a.storage.ClearAll(); err != nil { 513 + a.log.Error("Failed to clear cache", logutil.Error(err)) 514 + } else { 515 + a.log.Info("Cache cleared") 516 + } 517 + } 518 + 519 + var mode sync.ImportMode 520 + switch modeStr { 521 + case "lastfm": 522 + mode = sync.ImportModeLastFM 523 + case "spotify": 524 + mode = sync.ImportModeSpotify 525 + case "combined": 526 + mode = sync.ImportModeCombined 527 + default: 528 + return fmt.Errorf("invalid mode: %s (must be lastfm, spotify, or combined)", modeStr) 529 + } 530 + 531 + records, totalCount, err := sync.LoadRecordsForImport(ctx, sync.ImportOptions{ 532 + LastFMPath: lastfmPath, 533 + SpotifyPath: spotifyPath, 534 + Mode: mode, 535 + Tolerance: tolerance, 536 + LastFMParser: lastfm.Parser{}, 537 + SpotifyParser: spotify.Parser{}, 538 + }) 539 + if err != nil { 540 + return fmt.Errorf("load records: %w", err) 541 + } 542 + a.log.Info("Loaded records for import", slog.Int("total", totalCount), slog.Int("filtered", len(records))) 543 + 544 + if len(records) == 0 { 545 + a.log.Info("No new records to import") 546 + return nil 547 + } 548 + 549 + authClient, err := sync.NewClient(ctx, handle, password) 550 + if err != nil { 551 + return fmt.Errorf("create auth client: %w", err) 552 + } 553 + a.log.Info("Authenticated", logutil.DID(authClient.GetDID()), slog.String("pds", authClient.GetPDS())) 554 + 555 + limiter := sync.NewRateLimiter(a.storage) 556 + repoClient := sync.NewRateClient(authClient.GetAPIClient(), authClient.GetDID(), limiter) 557 + 558 + existingRecords, err := sync.FetchExisting(ctx, repoClient, authClient.GetDID(), a.storage, fresh) 559 + if err != nil { 560 + return fmt.Errorf("fetch existing records: %w", err) 561 + } 562 + a.log.Info("Fetched existing records", slog.Int("count", len(existingRecords))) 563 + 564 + published, _ := a.storage.GetPublished(authClient.GetDID()) 565 + newRecords := sync.FilterNew(records, existingRecords, published) 566 + skippedCount := len(records) - len(newRecords) 567 + a.log.Info("Filtered to new records", 568 + slog.Int("count", len(newRecords)), 569 + slog.Int("skipped", skippedCount)) 570 + 571 + if len(newRecords) == 0 { 572 + a.log.Info("All records already exist, nothing to import") 573 + return nil 574 + } 575 + 576 + if reverse { 577 + slices.Reverse(newRecords) 578 + } 579 + 580 + if len(newRecords) > 0 { 581 + newEntries := make(map[string][]byte) 582 + for _, rec := range newRecords { 583 + key := sync.CreateRecordKey(rec) 584 + value, _ := json.Marshal(rec) 585 + newEntries[key] = value 586 + } 587 + if err := a.storage.SaveRecords(authClient.GetDID(), newEntries); err != nil { 588 + return fmt.Errorf("save new records to storage: %w", err) 589 + } 590 + } 591 + 592 + cfg := sync.DefaultConfig 593 + cfg.BatchSize = batchSize 594 + cfg.BatchDelay = time.Duration(batchDelay) * time.Millisecond 595 + 596 + progressLog := a.createProgressLogger() 597 + 598 + publishOpts := sync.PublishOptions{ 599 + BatchSize: cfg.BatchSize, 600 + BatchDelay: cfg.BatchDelay, 601 + DryRun: dryRun, 602 + ATProtoClient: repoClient, 603 + ProgressLog: progressLog, 604 + Storage: a.storage, 605 + } 606 + 607 + result := sync.Publish(ctx, authClient, publishOpts, limiter) 608 + 609 + a.log.Info("Import completed", 610 + slog.Int("success_count", result.SuccessCount), 611 + slog.Int("error_count", result.ErrorCount), 612 + slog.Bool("cancelled", result.Cancelled), 613 + slog.Duration("duration", result.Duration), 614 + slog.Float64("records_per_minute", result.RecordsPerMinute)) 615 + 616 + if a.outputFormat == "json" { 617 + summary := map[string]any{ 618 + "successCount": result.SuccessCount, 619 + "errorCount": result.ErrorCount, 620 + "cancelled": result.Cancelled, 621 + "durationSeconds": result.Duration.Seconds(), 622 + "recordsPerMinute": result.RecordsPerMinute, 623 + "totalRecords": result.TotalRecords, 624 + } 625 + if data, err := json.MarshalIndent(summary, "", " "); err == nil { 626 + fmt.Fprintln(os.Stderr, string(data)) 627 + } 628 + } 629 + 630 + if result.Errored() { 631 + return fmt.Errorf("import completed with %d errors", result.ErrorCount) 632 + } 633 + 634 + return nil 635 + } 636 + 637 + func (a *App) createProgressLogger() func(sync.ProgressReport) { 638 + return func(pr sync.ProgressReport) { 639 + if a.outputFormat == "json" { 640 + if data, err := json.MarshalIndent(pr, "", " "); err == nil { 641 + fmt.Fprintln(os.Stderr, string(data)) 642 + } 643 + } else { 644 + a.log.Info("sync progress", 645 + slog.Int("completed", pr.Completed), 646 + slog.Int("total", pr.Total), 647 + slog.Float64("percent", pr.Percent), 648 + slog.String("elapsed", pr.Elapsed), 649 + slog.String("eta", pr.ETA), 650 + slog.String("rate", pr.Rate), 651 + slog.Int("errors", pr.Errors)) 652 + } 653 + } 654 + } 655 + 656 + func (a *App) runSync(ctx context.Context, cmd *cli.Command) error { 657 + authClient, err := a.prepareAuth(ctx, cmd) 658 + if err != nil { 659 + return err 660 + } 661 + 662 + fresh := cmd.Bool("fresh") 663 + a.log.Info("Starting sync operation", logutil.DID(authClient.GetDID()), slog.Bool("fresh", fresh)) 664 + 665 + limiter := sync.NewRateLimiter(a.storage) 666 + repoClient := sync.NewRateClient(authClient.GetAPIClient(), authClient.GetDID(), limiter) 667 + 668 + if fresh { 669 + if err := a.storage.Clear(authClient.GetDID()); err != nil { 670 + a.log.Error("Failed to clear cache", logutil.Error(err)) 671 + } else { 672 + a.log.Info("Cache cleared") 673 + } 674 + } 675 + 676 + existingRecords, err := sync.FetchExisting(ctx, repoClient, authClient.GetDID(), a.storage, fresh) 677 + if err != nil { 678 + return fmt.Errorf("fetch existing records: %w", err) 679 + } 680 + 681 + a.log.Info("Sync stats", slog.Int("total_records", len(existingRecords))) 682 + 683 + return nil 684 + } 685 + 686 + func (a *App) runDedupe(ctx context.Context, cmd *cli.Command) error { 687 + authClient, err := a.prepareAuth(ctx, cmd) 688 + if err != nil { 689 + return fmt.Errorf("authentication failed: %w\nHint: Make sure your credentials are correct.", err) 690 + } 691 + 692 + dryRun := cmd.Bool("dry-run") 693 + fresh := cmd.Bool("fresh") 694 + yes := cmd.Bool("yes") 695 + a.log.Info("Starting dedupe operation", 696 + logutil.DID(authClient.GetDID()), 697 + slog.Bool("dry_run", dryRun), 698 + slog.Bool("fresh", fresh)) 699 + 700 + limiter := sync.NewRateLimiter(a.storage) 701 + repoClient := sync.NewRateClient(authClient.GetAPIClient(), authClient.GetDID(), limiter) 702 + 703 + if fresh { 704 + if err := a.storage.Clear(authClient.GetDID()); err != nil { 705 + a.log.Error("Failed to clear cache", logutil.Error(err)) 706 + } else { 707 + a.log.Info("Cache cleared") 708 + } 709 + } 710 + 711 + existingRecords, err := sync.FetchExisting(ctx, repoClient, authClient.GetDID(), a.storage, fresh) 712 + if err != nil { 713 + return fmt.Errorf("failed to fetch existing records: %w\nHint: Check your network connection and try again.", err) 714 + } 715 + 716 + duplicates := sync.FindDuplicates(existingRecords) 717 + totalDuplicates := 0 718 + for _, group := range duplicates { 719 + totalDuplicates += len(group) - 1 720 + } 721 + 722 + a.log.Info("Dedupe analysis", 723 + slog.Int("total_records", len(existingRecords)), 724 + slog.Int("duplicate_groups", len(duplicates)), 725 + slog.Int("total_duplicates", totalDuplicates)) 726 + 727 + if totalDuplicates == 0 { 728 + a.log.Info("No duplicates found") 729 + return nil 730 + } 731 + 732 + if dryRun { 733 + a.log.InfoContext(ctx, "Dry run - would remove the following duplicates") 734 + for _, group := range duplicates { 735 + keep := group[0] 736 + for _, rec := range group[1:] { 737 + a.log.InfoContext(ctx, "Would remove", 738 + slog.String("uri", rec.URI), 739 + slog.String("track", keep.Value.TrackName), 740 + slog.String("artist", getArtistName(keep.Value)), 741 + slog.String("time", keep.Value.PlayedTime.Format(time.RFC3339))) 742 + } 743 + } 744 + return nil 745 + } 746 + 747 + if !yes { 748 + fmt.Fprintf(os.Stderr, "\nThis will permanently delete %d duplicate record(s). Continue? [y/N]: ", totalDuplicates) 749 + var response string 750 + fmt.Scanln(&response) 751 + if response != "y" && response != "Y" { 752 + a.log.Info("Dedupe cancelled by user") 753 + return nil 754 + } 755 + } 756 + 757 + for _, group := range duplicates { 758 + for i := 1; i < len(group); i++ { 759 + rec := group[i] 760 + uri := rec.URI 761 + parts := strings.Split(uri, "/") 762 + rkey := parts[len(parts)-1] 763 + err := repoClient.DeleteRecord(ctx, sync.RecordType, rkey) 764 + if err != nil { 765 + a.log.Error("Failed to delete record", logutil.Error(err), slog.String("uri", uri)) 766 + } else { 767 + a.log.Info("Deleted duplicate", slog.String("uri", uri), slog.String("track", rec.Value.TrackName)) 768 + } 769 + } 770 + } 771 + 772 + if err := a.storage.Clear(authClient.GetDID()); err != nil { 773 + a.log.Error("Failed to clear cache", "err", err) 774 + } 775 + 776 + return nil 777 + } 778 + 779 + func (a *App) loadRecordsForImport(ctx context.Context, lastfmPath, spotifyPath string, mode sync.ImportMode, tolerance time.Duration) ([]sync.PlayRecord, int, error) { 780 + var lastfmRecords, spotifyRecords []sync.PlayRecord 781 + var err error 782 + 783 + if mode == sync.ImportModeLastFM || mode == sync.ImportModeCombined { 784 + lastfmRecords, err = sync.ParseInput(ctx, lastfmPath, lastfm.Parser{}) 785 + if err != nil { 786 + return nil, 0, fmt.Errorf("parse lastfm: %w", err) 787 + } 788 + } 789 + 790 + if mode == sync.ImportModeSpotify || mode == sync.ImportModeCombined { 791 + spotifyRecords, err = sync.ParseInput(ctx, spotifyPath, spotify.Parser{}) 792 + if err != nil { 793 + return nil, 0, fmt.Errorf("parse spotify: %w", err) 794 + } 795 + } 796 + 797 + totalInput := len(lastfmRecords) + len(spotifyRecords) 798 + 799 + var ( 800 + mergedRecords []sync.PlayRecord 801 + stats sync.MergeStats 802 + ) 803 + 804 + switch mode { 805 + case sync.ImportModeCombined: 806 + mergedRecords, stats = sync.MergeRecords(lastfmRecords, spotifyRecords, tolerance) 807 + a.log.Debug("Merged records", 808 + slog.Int("merged_total", stats.MergedTotal), 809 + slog.Int("duplicates_removed", stats.DuplicatesRemoved)) 810 + case sync.ImportModeLastFM: 811 + mergedRecords = lastfmRecords 812 + default: 813 + mergedRecords = spotifyRecords 814 + } 815 + 816 + return mergedRecords, totalInput, nil 817 + } 818 + 819 + func (a *App) outputRecords(records []sync.PlayRecord, outputPath string) error { 820 + var output io.Writer = os.Stdout 821 + if outputPath != "" { 822 + file, err := os.Create(outputPath) 823 + if err != nil { 824 + return fmt.Errorf("create output file: %w", err) 825 + } 826 + defer file.Close() 827 + output = file 828 + } 829 + 830 + enc := json.NewEncoder(output) 831 + for _, r := range records { 832 + if err := enc.Encode(r); err != nil { 833 + return fmt.Errorf("encode record: %w", err) 834 + } 835 + } 836 + 837 + return nil 838 + } 839 + 840 + func getArtistName(record sync.PlayRecord) string { 841 + if len(record.Artists) > 0 { 842 + return record.Artists[0].ArtistName 843 + } 844 + return "Unknown Artist" 845 + }
+141
sources/lastfm/lastfm.go
··· 1 + package lastfm 2 + 3 + import ( 4 + "context" 5 + "encoding/csv" 6 + "errors" 7 + "io" 8 + "io/fs" 9 + "net/url" 10 + "strconv" 11 + "strings" 12 + "time" 13 + 14 + "tangled.org/karitham.dev/lazuli/sync" 15 + ) 16 + 17 + type Parser struct{} 18 + 19 + func (Parser) ParseFile(ctx context.Context, r io.Reader) ([]sync.PlayRecord, error) { 20 + reader := csv.NewReader(r) 21 + reader.TrimLeadingSpace = true 22 + reader.FieldsPerRecord = -1 23 + 24 + _, err := reader.Read() 25 + if err != nil { 26 + return nil, err 27 + } 28 + 29 + records := make([]record, 0, 256) 30 + for { 31 + select { 32 + case <-ctx.Done(): 33 + return nil, ctx.Err() 34 + default: 35 + } 36 + 37 + row, err := reader.Read() 38 + if err != nil { 39 + if errors.Is(err, io.EOF) { 40 + break 41 + } 42 + return nil, err 43 + } 44 + if len(row) < 7 { 45 + continue 46 + } 47 + 48 + trackMbid := "" 49 + if len(row) > 7 { 50 + trackMbid = strings.TrimSpace(row[7]) 51 + } 52 + 53 + records = append(records, record{ 54 + Uts: strings.TrimSpace(row[0]), 55 + UtcTime: strings.TrimSpace(row[1]), 56 + Artist: strings.TrimSpace(row[2]), 57 + ArtistMbid: strings.TrimSpace(row[3]), 58 + Album: strings.TrimSpace(row[4]), 59 + AlbumMbid: strings.TrimSpace(row[5]), 60 + Track: strings.TrimSpace(row[6]), 61 + TrackMbid: trackMbid, 62 + }) 63 + } 64 + 65 + return toSync(records), nil 66 + } 67 + 68 + func (Parser) ParseFS(ctx context.Context, fsys fs.FS) ([]sync.PlayRecord, error) { 69 + allRecords := make([]sync.PlayRecord, 0, 256) 70 + 71 + var walkErr error 72 + fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error { 73 + select { 74 + case <-ctx.Done(): 75 + return ctx.Err() 76 + default: 77 + } 78 + 79 + if err != nil { 80 + walkErr = err 81 + return err 82 + } 83 + if d.IsDir() || !strings.HasSuffix(path, ".csv") { 84 + return nil 85 + } 86 + 87 + file, err := fsys.Open(path) 88 + if err != nil { 89 + walkErr = err 90 + return err 91 + } 92 + defer file.Close() 93 + 94 + records, err := Parser{}.ParseFile(ctx, file) 95 + if err != nil { 96 + walkErr = err 97 + return err 98 + } 99 + 100 + allRecords = append(allRecords, records...) 101 + return nil 102 + }) 103 + 104 + return allRecords, walkErr 105 + } 106 + 107 + type record struct { 108 + Uts string 109 + UtcTime string 110 + Artist string 111 + ArtistMbid string 112 + Album string 113 + AlbumMbid string 114 + Track string 115 + TrackMbid string 116 + } 117 + 118 + func toSync(records []record) []sync.PlayRecord { 119 + result := make([]sync.PlayRecord, 0, len(records)) 120 + 121 + for _, r := range records { 122 + utsSec, err := strconv.ParseInt(r.Uts, 10, 64) 123 + if err != nil { 124 + continue 125 + } 126 + 127 + result = append(result, sync.PlayRecord{ 128 + Type: sync.RecordType, 129 + TrackName: r.Track, 130 + Artists: []sync.PlayRecordArtist{{ArtistName: r.Artist, ArtistMbId: r.ArtistMbid}}, 131 + PlayedTime: sync.Timestamp{Time: time.Unix(utsSec, 0).UTC()}, 132 + SubmissionClientAgent: sync.ClientAgent, 133 + MusicServiceBaseDomain: sync.MusicServiceLastFM, 134 + ReleaseName: r.Album, 135 + ReleaseMbId: r.AlbumMbid, 136 + RecordingMbId: r.TrackMbid, 137 + OriginUrl: "https://www.last.fm/music/" + url.QueryEscape(r.Artist) + "/_/" + url.QueryEscape(r.Track), 138 + }) 139 + } 140 + return result 141 + }
+136
sources/lastfm/lastfm_test.go
··· 1 + package lastfm 2 + 3 + import ( 4 + "strings" 5 + "testing" 6 + 7 + "tangled.org/karitham.dev/lazuli/sync" 8 + ) 9 + 10 + func TestParseFile(t *testing.T) { 11 + tests := []struct { 12 + name string 13 + content string 14 + wantLen int 15 + checkFn func([]sync.PlayRecord) bool 16 + }{ 17 + { 18 + name: "parses multiple records", 19 + content: `uts,utc_time,artist,artist_mbid,album,album_mbid,track,track_mbid 20 + "1705315800",2024-01-15 10:30:00,The Smiths,,The Queen Is Dead,,There Is a Light That Never Goes Out, 21 + 1705319400,2024-01-15 11:30:00,Queen,abc123,A Night at the Opera,def456,Bohemian Rhapsody,ghi789`, 22 + wantLen: 2, 23 + checkFn: func(records []sync.PlayRecord) bool { 24 + return records[0].Artists[0].ArtistName == "The Smiths" && 25 + records[1].TrackName == "Bohemian Rhapsody" && 26 + records[1].Artists[0].ArtistMbId == "abc123" 27 + }, 28 + }, 29 + { 30 + name: "handles empty file", 31 + content: `uts,utc_time,artist,artist_mbid,album,album_mbid,track,track_mbid`, 32 + wantLen: 0, 33 + checkFn: nil, 34 + }, 35 + { 36 + name: "skips short rows", 37 + content: `uts,utc_time,artist,artist_mbid,album,album_mbid,track,track_mbid 38 + 1705315800,2024-01-15 10:30:00,Artist,,,Track,`, 39 + wantLen: 1, 40 + checkFn: nil, 41 + }, 42 + } 43 + 44 + for _, tt := range tests { 45 + t.Run(tt.name, func(t *testing.T) { 46 + records, err := Parser{}.ParseFile(t.Context(), strings.NewReader(tt.content)) 47 + if err != nil { 48 + t.Fatalf("ParseFile() error = %v", err) 49 + } 50 + 51 + if len(records) != tt.wantLen { 52 + t.Errorf("len(records) = %d, want %d", len(records), tt.wantLen) 53 + } 54 + 55 + if tt.checkFn != nil && !tt.checkFn(records) { 56 + t.Error("checkFn failed") 57 + } 58 + }) 59 + } 60 + } 61 + 62 + func TestToSync(t *testing.T) { 63 + tests := []struct { 64 + name string 65 + records []record 66 + checkFn func(sync.PlayRecord) bool 67 + }{ 68 + { 69 + name: "converts record with all fields", 70 + records: []record{{ 71 + Uts: "1705315800", 72 + UtcTime: "2024-01-15 10:30:00", 73 + Artist: "The Smiths", 74 + ArtistMbid: "mbid-123", 75 + Album: "The Queen Is Dead", 76 + AlbumMbid: "mbid-456", 77 + Track: "There Is a Light That Never Goes Out", 78 + TrackMbid: "mbid-789", 79 + }}, 80 + checkFn: func(rec sync.PlayRecord) bool { 81 + return rec.Type == sync.RecordType && 82 + rec.TrackName == "There Is a Light That Never Goes Out" && 83 + rec.MusicServiceBaseDomain == sync.MusicServiceLastFM && 84 + len(rec.Artists) == 1 && 85 + rec.Artists[0].ArtistName == "The Smiths" && 86 + rec.Artists[0].ArtistMbId == "mbid-123" && 87 + rec.ReleaseName == "The Queen Is Dead" && 88 + rec.ReleaseMbId == "mbid-456" && 89 + rec.RecordingMbId == "mbid-789" 90 + }, 91 + }, 92 + { 93 + name: "handles missing optional fields", 94 + records: []record{{ 95 + Uts: "1705315800", 96 + Artist: "Unknown Artist", 97 + Track: "Unknown Track", 98 + }}, 99 + checkFn: func(rec sync.PlayRecord) bool { 100 + return rec.Artists[0].ArtistMbId == "" && 101 + rec.ReleaseMbId == "" && 102 + rec.RecordingMbId == "" && 103 + rec.ReleaseName == "" 104 + }, 105 + }, 106 + { 107 + name: "converts multiple records", 108 + records: []record{ 109 + {Uts: "1705315800", Artist: "Artist1", Track: "Track1"}, 110 + {Uts: "1705319400", Artist: "Artist2", Track: "Track2"}, 111 + {Uts: "1705323000", Artist: "Artist3", Track: "Track3"}, 112 + }, 113 + checkFn: func(rec sync.PlayRecord) bool { 114 + return rec.Artists[0].ArtistName == "Artist1" || 115 + rec.Artists[0].ArtistName == "Artist2" || 116 + rec.Artists[0].ArtistName == "Artist3" 117 + }, 118 + }, 119 + } 120 + 121 + for _, tt := range tests { 122 + t.Run(tt.name, func(t *testing.T) { 123 + syncRecords := toSync(tt.records) 124 + 125 + if len(syncRecords) != len(tt.records) { 126 + t.Errorf("len(syncRecords) = %d, want %d", len(syncRecords), len(tt.records)) 127 + } 128 + 129 + for i, rec := range syncRecords { 130 + if tt.checkFn != nil && !tt.checkFn(rec) { 131 + t.Errorf("checkFn failed at index %d", i) 132 + } 133 + } 134 + }) 135 + } 136 + }
+142
sources/spotify/spotify.go
··· 1 + package spotify 2 + 3 + import ( 4 + "context" 5 + "encoding/json" 6 + "io" 7 + "io/fs" 8 + "strings" 9 + "time" 10 + 11 + "tangled.org/karitham.dev/lazuli/sync" 12 + ) 13 + 14 + type Parser struct{} 15 + 16 + func (Parser) ParseFile(ctx context.Context, r io.Reader) ([]sync.PlayRecord, error) { 17 + var records []record 18 + if err := json.NewDecoder(r).Decode(&records); err != nil { 19 + return nil, err 20 + } 21 + 22 + result := make([]sync.PlayRecord, 0, len(records)) 23 + for _, r := range records { 24 + select { 25 + case <-ctx.Done(): 26 + return result, ctx.Err() 27 + default: 28 + } 29 + 30 + if r.MasterMetadataTrackName == nil || *r.MasterMetadataTrackName == "" || 31 + r.MasterMetadataAlbumArtistName == nil || *r.MasterMetadataAlbumArtistName == "" { 32 + continue 33 + } 34 + if r.EpisodeName != nil && *r.EpisodeName != "" { 35 + continue 36 + } 37 + if r.MsPlayed == 0 || time.Duration(r.MsPlayed)*time.Millisecond < sync.MinListenDuration { 38 + continue 39 + } 40 + 41 + trackName := *r.MasterMetadataTrackName 42 + artistName := *r.MasterMetadataAlbumArtistName 43 + 44 + trackId := "" 45 + if r.SpotifyTrackUri != nil { 46 + parts := strings.Split(*r.SpotifyTrackUri, ":") 47 + if len(parts) == 3 { 48 + trackId = parts[2] 49 + } 50 + } 51 + 52 + originUrl := "" 53 + if trackId != "" { 54 + originUrl = "https://open.spotify.com/track/" + trackId 55 + } 56 + 57 + releaseName := "" 58 + if r.MasterMetadataAlbumAlbumName != nil { 59 + releaseName = *r.MasterMetadataAlbumAlbumName 60 + } 61 + 62 + result = append(result, sync.PlayRecord{ 63 + Type: sync.RecordType, 64 + TrackName: trackName, 65 + Artists: []sync.PlayRecordArtist{{ArtistName: artistName}}, 66 + PlayedTime: r.Ts, 67 + SubmissionClientAgent: sync.ClientAgent, 68 + MusicServiceBaseDomain: sync.MusicServiceSpotify, 69 + ReleaseName: releaseName, 70 + OriginUrl: originUrl, 71 + MsPlayed: r.MsPlayed, 72 + }) 73 + } 74 + 75 + return result, nil 76 + } 77 + 78 + func (Parser) ParseFS(ctx context.Context, fsys fs.FS) ([]sync.PlayRecord, error) { 79 + allRecords := make([]sync.PlayRecord, 0, 256) 80 + 81 + err := fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error { 82 + select { 83 + case <-ctx.Done(): 84 + return ctx.Err() 85 + default: 86 + } 87 + 88 + if err != nil { 89 + return err 90 + } 91 + if d.IsDir() { 92 + return nil 93 + } 94 + 95 + name := d.Name() 96 + if !strings.HasPrefix(name, "Streaming_History_Audio_") || !strings.HasSuffix(name, ".json") { 97 + return nil 98 + } 99 + 100 + file, err := fsys.Open(path) 101 + if err != nil { 102 + return err 103 + } 104 + 105 + records, err := Parser{}.ParseFile(ctx, file) 106 + if err != nil { 107 + file.Close() 108 + return err 109 + } 110 + if err := file.Close(); err != nil { 111 + return err 112 + } 113 + 114 + allRecords = append(allRecords, records...) 115 + return nil 116 + }) 117 + if err != nil { 118 + return nil, err 119 + } 120 + 121 + return allRecords, nil 122 + } 123 + 124 + type record struct { 125 + Ts sync.Timestamp `json:"ts"` 126 + Platform string `json:"platform"` 127 + MsPlayed int `json:"ms_played"` 128 + ConnCountry string `json:"conn_country"` 129 + MasterMetadataTrackName *string `json:"master_metadata_track_name"` 130 + MasterMetadataAlbumArtistName *string `json:"master_metadata_album_artist_name"` 131 + MasterMetadataAlbumAlbumName *string `json:"master_metadata_album_album_name"` 132 + SpotifyTrackUri *string `json:"spotify_track_uri"` 133 + EpisodeName *string `json:"episode_name"` 134 + EpisodeShowName *string `json:"episode_show_name"` 135 + ReasonStart string `json:"reason_start"` 136 + ReasonEnd string `json:"reason_end"` 137 + Shuffle bool `json:"shuffle"` 138 + Skipped bool `json:"skipped"` 139 + Offline bool `json:"offline"` 140 + OfflineTimestamp *int `json:"offline_timestamp"` 141 + IncognitoMode bool `json:"incognito_mode"` 142 + }
+62
sources/spotify/spotify_test.go
··· 1 + package spotify 2 + 3 + import ( 4 + "strings" 5 + "testing" 6 + 7 + "tangled.org/karitham.dev/lazuli/sync" 8 + ) 9 + 10 + func TestParseFile(t *testing.T) { 11 + tests := []struct { 12 + name string 13 + content string 14 + wantLen int 15 + checkFn func([]sync.PlayRecord) bool 16 + }{ 17 + { 18 + name: "parses single record", 19 + content: `[{ 20 + "ts": "2024-01-15T10:30:00.000Z", 21 + "platform": "linux", 22 + "ms_played": 234000, 23 + "conn_country": "US", 24 + "master_metadata_track_name": "There Is a Light That Never Goes Out", 25 + "master_metadata_album_artist_name": "The Smiths", 26 + "master_metadata_album_album_name": "The Queen Is Dead", 27 + "spotify_track_uri": "spotify:track:4lCyGXS4wC07dDX9f4F1Q5", 28 + "episode_name": null, 29 + "episode_show_name": null, 30 + "reason_start": "trackdone", 31 + "reason_end": "trackdone", 32 + "shuffle": false, 33 + "skipped": false, 34 + "offline": false, 35 + "offline_timestamp": null, 36 + "incognito_mode": false 37 + }]`, 38 + wantLen: 1, 39 + checkFn: func(records []sync.PlayRecord) bool { 40 + return records[0].Artists[0].ArtistName == "The Smiths" && 41 + records[0].TrackName == "There Is a Light That Never Goes Out" 42 + }, 43 + }, 44 + } 45 + 46 + for _, tt := range tests { 47 + t.Run(tt.name, func(t *testing.T) { 48 + records, err := Parser{}.ParseFile(t.Context(), strings.NewReader(tt.content)) 49 + if err != nil { 50 + t.Fatalf("ParseFile() error = %v", err) 51 + } 52 + 53 + if len(records) != tt.wantLen { 54 + t.Errorf("len(records) = %d, want %d", len(records), tt.wantLen) 55 + } 56 + 57 + if tt.checkFn != nil && !tt.checkFn(records) { 58 + t.Error("checkFn failed") 59 + } 60 + }) 61 + } 62 + }
+359
sync/adapter.go
··· 1 + package sync 2 + 3 + import ( 4 + "context" 5 + "encoding/json" 6 + "fmt" 7 + "log/slog" 8 + 9 + "github.com/bluesky-social/indigo/atproto/atclient" 10 + "github.com/bluesky-social/indigo/atproto/syntax" 11 + 12 + "tangled.org/karitham.dev/lazuli/cache" 13 + "tangled.org/karitham.dev/lazuli/sync/logutil" 14 + ) 15 + 16 + type RepoClient interface { 17 + ListRecords(ctx context.Context, collection string, limit int, cursor string) ([]RecordRef, string, error) 18 + ApplyWrites(ctx context.Context, collection string, records []PlayRecord) error 19 + DeleteRecord(ctx context.Context, collection, rkey string) error 20 + } 21 + 22 + type RecordRef struct { 23 + URI string 24 + CID string 25 + Value PlayRecord 26 + } 27 + 28 + type RateClient struct { 29 + client *atclient.APIClient 30 + did string 31 + limiter RateLimiter 32 + } 33 + 34 + func NewRateClient(client *atclient.APIClient, did string, limiter RateLimiter) *RateClient { 35 + return &RateClient{ 36 + client: client, 37 + did: did, 38 + limiter: limiter, 39 + } 40 + } 41 + 42 + func (c *RateClient) ListRecords(ctx context.Context, collection string, limit int, cursor string) ([]RecordRef, string, error) { 43 + if c.client == nil { 44 + return nil, "", fmt.Errorf("client cannot be nil") 45 + } 46 + 47 + if c.limiter != nil { 48 + slog.Debug("waiting for rate limit (read)") 49 + if err := c.limiter.AllowRead(ctx); err != nil { 50 + slog.Error("rate limit wait cancelled/failed (read)", logutil.Error(err)) 51 + return nil, "", err 52 + } 53 + } 54 + 55 + var out []RecordRef 56 + 57 + for { 58 + select { 59 + case <-ctx.Done(): 60 + return nil, "", ctx.Err() 61 + default: 62 + } 63 + 64 + var outResp struct { 65 + Records []struct { 66 + URI string `json:"uri"` 67 + CID string `json:"cid"` 68 + Value map[string]any `json:"value"` 69 + } `json:"records"` 70 + Cursor string `json:"cursor"` 71 + } 72 + 73 + err := c.client.Get(ctx, syntax.NSID("com.atproto.repo.listRecords"), map[string]any{ 74 + "repo": c.did, 75 + "collection": collection, 76 + "limit": limit, 77 + "cursor": cursor, 78 + }, &outResp) 79 + if err != nil { 80 + return nil, "", err 81 + } 82 + 83 + for _, r := range outResp.Records { 84 + var playRecord PlayRecord 85 + if r.Value != nil { 86 + b, err := json.Marshal(r.Value) 87 + if err != nil { 88 + slog.Debug("failed to marshal record value", slog.String("uri", r.URI), logutil.Error(err)) 89 + continue 90 + } 91 + if err := json.Unmarshal(b, &playRecord); err != nil { 92 + slog.Debug("failed to unmarshal record", slog.String("uri", r.URI), logutil.Error(err)) 93 + continue 94 + } 95 + slog.Debug("parsed record", slog.String("uri", r.URI), logutil.Track(playRecord.TrackName, getArtistName(playRecord), playRecord.PlayedTime.Time)) 96 + } 97 + out = append(out, RecordRef{ 98 + URI: r.URI, 99 + CID: r.CID, 100 + Value: playRecord, 101 + }) 102 + } 103 + 104 + if outResp.Cursor == "" || len(outResp.Records) < limit { 105 + break 106 + } 107 + cursor = outResp.Cursor 108 + } 109 + 110 + return out, cursor, nil 111 + } 112 + 113 + func (c *RateClient) ApplyWrites(ctx context.Context, collection string, records []PlayRecord) error { 114 + if len(records) == 0 { 115 + return nil 116 + } 117 + 118 + if c.client == nil { 119 + return fmt.Errorf("client cannot be nil") 120 + } 121 + 122 + if c.limiter != nil { 123 + slog.Debug("waiting for rate limit (write)") 124 + if err := c.limiter.AllowBulkWrite(ctx, len(records)); err != nil { 125 + slog.Error("rate limit wait cancelled/failed (write)", logutil.Error(err)) 126 + return err 127 + } 128 + } 129 + 130 + writes, err := prepareWrites(records, collection) 131 + if err != nil { 132 + return err 133 + } 134 + 135 + err = c.client.Post(ctx, syntax.NSID("com.atproto.repo.applyWrites"), map[string]any{ 136 + "repo": c.did, 137 + "writes": writes, 138 + }, nil) 139 + if err != nil && c.limiter != nil { 140 + c.limiter.RefundBulkWrite(len(records)) 141 + } 142 + return err 143 + } 144 + 145 + func (c *RateClient) DeleteRecord(ctx context.Context, collection, rkey string) error { 146 + if c.client == nil { 147 + return fmt.Errorf("client is nil") 148 + } 149 + 150 + if c.limiter != nil { 151 + slog.Debug("waiting for rate limit (delete)") 152 + if err := c.limiter.AllowBulkWrite(ctx, 1); err != nil { 153 + slog.Error("rate limit wait cancelled/failed (delete)", logutil.Error(err)) 154 + return err 155 + } 156 + } 157 + 158 + _, err := c.client.Do(ctx, &atclient.APIRequest{ 159 + Method: "DELETE", 160 + Endpoint: syntax.NSID("com.atproto.repo.deleteRecord"), 161 + QueryParams: map[string][]string{ 162 + "repo": {c.did}, 163 + "collection": {collection}, 164 + "rkey": {rkey}, 165 + }, 166 + }) 167 + if err != nil && c.limiter != nil { 168 + c.limiter.RefundBulkWrite(1) 169 + } 170 + return err 171 + } 172 + 173 + type ExistingRecord struct { 174 + URI string 175 + CID string 176 + Value PlayRecord 177 + } 178 + 179 + func FetchExisting(ctx context.Context, client RepoClient, did string, storage cache.Storage, forceRefresh bool) ([]ExistingRecord, error) { 180 + if !forceRefresh && storage != nil { 181 + published, err := storage.GetPublished(did) 182 + if err == nil && len(published) > 0 && storage.IsValid(did) { 183 + records := make([]ExistingRecord, 0, len(published)) 184 + err := storage.IteratePublished(did, func(key string, data []byte) error { 185 + var value PlayRecord 186 + if err := json.Unmarshal(data, &value); err != nil { 187 + return nil 188 + } 189 + records = append(records, ExistingRecord{ 190 + URI: generateRecordURI(did, value), 191 + Value: value, 192 + }) 193 + return nil 194 + }) 195 + if err == nil { 196 + slog.Debug("loaded from cache", slog.Int("count", len(records))) 197 + return records, nil 198 + } 199 + } 200 + } 201 + 202 + select { 203 + case <-ctx.Done(): 204 + return nil, ctx.Err() 205 + default: 206 + } 207 + 208 + allRecords := make([]ExistingRecord, 0, 1024) 209 + const batchSize = 100 210 + var cursor string 211 + 212 + for { 213 + select { 214 + case <-ctx.Done(): 215 + return nil, ctx.Err() 216 + default: 217 + } 218 + 219 + records, newCursor, err := client.ListRecords(ctx, RecordType, batchSize, cursor) 220 + if err != nil { 221 + return nil, err 222 + } 223 + 224 + for _, rec := range records { 225 + allRecords = append(allRecords, ExistingRecord(rec)) 226 + } 227 + 228 + if newCursor == "" || len(records) < batchSize { 229 + break 230 + } 231 + cursor = newCursor 232 + } 233 + 234 + if storage != nil { 235 + cacheEntries := make(map[string][]byte) 236 + keys := make([]string, 0, len(allRecords)) 237 + for _, rec := range allRecords { 238 + key := CreateRecordKey(rec.Value) 239 + value, _ := json.Marshal(rec.Value) 240 + cacheEntries[key] = value 241 + keys = append(keys, key) 242 + } 243 + 244 + if err := storage.SaveRecords(did, cacheEntries); err != nil { 245 + return nil, err 246 + } 247 + 248 + // Mark remote records as published locally to prevent redundant syncs 249 + if err := storage.MarkPublished(did, keys...); err != nil { 250 + return nil, err 251 + } 252 + 253 + slog.Debug("saved to cache and marked as published", slog.Int("count", len(allRecords))) 254 + } 255 + 256 + return allRecords, nil 257 + } 258 + 259 + func FilterNew(records []PlayRecord, existing []ExistingRecord, processed map[string]bool) []PlayRecord { 260 + existingKeys := make(map[string]bool) 261 + for _, rec := range existing { 262 + key := CreateRecordKey(rec.Value) 263 + if key == "|||" { 264 + continue 265 + } 266 + existingKeys[key] = true 267 + } 268 + 269 + var newRecords []PlayRecord 270 + for _, record := range records { 271 + key := CreateRecordKey(record) 272 + if !existingKeys[key] && !processed[key] { 273 + newRecords = append(newRecords, record) 274 + } 275 + } 276 + return newRecords 277 + } 278 + 279 + func FindDuplicates(records []ExistingRecord) map[string][]ExistingRecord { 280 + groups := make(map[string][]ExistingRecord) 281 + for _, rec := range records { 282 + key := CreateRecordKey(rec.Value) 283 + if key == "|||" { 284 + continue 285 + } 286 + groups[key] = append(groups[key], rec) 287 + } 288 + 289 + duplicates := make(map[string][]ExistingRecord) 290 + for key, group := range groups { 291 + if len(group) >= 2 { 292 + duplicates[key] = group 293 + } 294 + } 295 + return duplicates 296 + } 297 + 298 + func prepareWrites(records []PlayRecord, collection string) ([]map[string]any, error) { 299 + if len(records) == 0 { 300 + return nil, nil 301 + } 302 + 303 + writes := make([]map[string]any, len(records)) 304 + usedTIDs := make(map[syntax.TID]int) 305 + 306 + for i, rec := range records { 307 + tid := syntax.NewTIDFromTime(rec.PlayedTime.Time, 0) 308 + for usedTIDs[tid]++; usedTIDs[tid] > 1; { 309 + tid = syntax.NewTIDFromTime(rec.PlayedTime.Time, uint(usedTIDs[tid])) 310 + } 311 + usedTIDs[tid]++ 312 + 313 + writes[i] = map[string]any{ 314 + "$type": "com.atproto.repo.applyWrites#create", 315 + "collection": collection, 316 + "rkey": string(tid), 317 + "value": rec, 318 + } 319 + } 320 + 321 + return writes, nil 322 + } 323 + 324 + func generateRecordURI(did string, record PlayRecord) string { 325 + tid := syntax.NewTIDFromTime(record.PlayedTime.Time, 0) 326 + return fmt.Sprintf("at://%s/%s/%s", did, RecordType, tid) 327 + } 328 + 329 + func AddToCache(did string, records []ExistingRecord, storage cache.Storage) error { 330 + if storage == nil { 331 + return nil 332 + } 333 + newEntries := make(map[string][]byte) 334 + for _, rec := range records { 335 + key := CreateRecordKey(rec.Value) 336 + value, _ := json.Marshal(rec.Value) 337 + newEntries[key] = value 338 + } 339 + return storage.SaveRecords(did, newEntries) 340 + } 341 + 342 + func AddToCacheWithKeys(did string, records []PlayRecord, keys []string, storage cache.Storage) error { 343 + if storage == nil { 344 + return nil 345 + } 346 + if did == "" { 347 + return fmt.Errorf("did cannot be empty") 348 + } 349 + if len(records) != len(keys) { 350 + return fmt.Errorf("records and keys length mismatch: %d vs %d", len(records), len(keys)) 351 + } 352 + newEntries := make(map[string][]byte) 353 + for i, rec := range records { 354 + key := keys[i] 355 + value, _ := json.Marshal(rec) 356 + newEntries[key] = value 357 + } 358 + return storage.SaveRecords(did, newEntries) 359 + }
+60
sync/atproto.go
··· 1 + package sync 2 + 3 + import ( 4 + "context" 5 + "encoding/json" 6 + "fmt" 7 + "net/http" 8 + "net/url" 9 + "time" 10 + ) 11 + 12 + var UserAgent string 13 + 14 + var httpClient = &http.Client{ 15 + Timeout: 10 * time.Second, 16 + } 17 + 18 + func ResolveMiniDoc(ctx context.Context, identifier string) (did, pds, signingKey string, err error) { 19 + parsedURL, err := url.Parse(SlingshotResolverURL) 20 + if err != nil { 21 + return "", "", "", fmt.Errorf("invalid resolver URL: %w", err) 22 + } 23 + 24 + query := parsedURL.Query() 25 + query.Set("identifier", identifier) 26 + parsedURL.RawQuery = query.Encode() 27 + 28 + req, err := http.NewRequestWithContext(ctx, "GET", parsedURL.String(), nil) 29 + if err != nil { 30 + return "", "", "", fmt.Errorf("failed to create request: %w", err) 31 + } 32 + if UserAgent != "" { 33 + req.Header.Set("User-Agent", UserAgent) 34 + } 35 + 36 + resp, err := httpClient.Do(req) 37 + if err != nil { 38 + return "", "", "", fmt.Errorf("failed to resolve mini doc: %w", err) 39 + } 40 + defer resp.Body.Close() 41 + 42 + if resp.StatusCode != http.StatusOK { 43 + return "", "", "", fmt.Errorf("mini doc resolution failed with status: %d", resp.StatusCode) 44 + } 45 + 46 + var result struct { 47 + DID string `json:"did"` 48 + PDS string `json:"pds"` 49 + SigningKey string `json:"signing_key"` 50 + } 51 + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { 52 + return "", "", "", fmt.Errorf("failed to decode mini doc response: %w", err) 53 + } 54 + 55 + if result.DID == "" { 56 + return "", "", "", fmt.Errorf("resolved mini doc missing DID") 57 + } 58 + 59 + return result.DID, result.PDS, result.SigningKey, nil 60 + }
+105
sync/atproto_auth.go
··· 1 + package sync 2 + 3 + import ( 4 + "context" 5 + "encoding/json" 6 + "fmt" 7 + "net/http" 8 + "strings" 9 + "sync" 10 + 11 + "github.com/bluesky-social/indigo/atproto/atclient" 12 + "github.com/bluesky-social/indigo/atproto/syntax" 13 + ) 14 + 15 + type FixedPasswordAuth struct { 16 + *atclient.PasswordAuth 17 + lk sync.RWMutex 18 + } 19 + 20 + func (a *FixedPasswordAuth) DoWithAuth(c *http.Client, req *http.Request, endpoint syntax.NSID) (*http.Response, error) { 21 + accessToken, refreshToken := a.GetTokens() 22 + req.Header.Set("Authorization", "Bearer "+accessToken) 23 + resp, err := c.Do(req) 24 + if err != nil { 25 + return nil, err 26 + } 27 + 28 + if resp.StatusCode != http.StatusBadRequest || !strings.HasPrefix(resp.Header.Get("Content-Type"), "application/json") { 29 + return resp, nil 30 + } 31 + 32 + defer resp.Body.Close() 33 + var eb atclient.ErrorBody 34 + if err := json.NewDecoder(resp.Body).Decode(&eb); err != nil { 35 + return nil, &atclient.APIError{StatusCode: resp.StatusCode} 36 + } 37 + if eb.Name != "ExpiredToken" { 38 + return nil, eb.APIError(resp.StatusCode) 39 + } 40 + 41 + if err := a.Refresh(req.Context(), c, refreshToken); err != nil { 42 + return nil, err 43 + } 44 + 45 + retry := req.Clone(req.Context()) 46 + if req.GetBody != nil { 47 + retry.Body, err = req.GetBody() 48 + if err != nil { 49 + return nil, fmt.Errorf("API request retry GetBody failed: %w", err) 50 + } 51 + } 52 + 53 + accessToken, _ = a.GetTokens() 54 + retry.Header.Set("Authorization", "Bearer "+accessToken) 55 + return c.Do(retry) 56 + } 57 + 58 + func (a *FixedPasswordAuth) Refresh(ctx context.Context, c *http.Client, priorRefreshToken string) error { 59 + a.lk.Lock() 60 + defer a.lk.Unlock() 61 + 62 + if priorRefreshToken != "" && priorRefreshToken != a.Session.RefreshToken { 63 + return nil 64 + } 65 + 66 + u := a.Session.Host + "/xrpc/com.atproto.server.refreshSession" 67 + req, err := http.NewRequestWithContext(ctx, http.MethodPost, u, nil) 68 + if err != nil { 69 + return err 70 + } 71 + req.Header.Set("User-Agent", "indigo-sdk") 72 + req.Header.Set("Authorization", "Bearer "+a.Session.RefreshToken) 73 + 74 + resp, err := c.Do(req) 75 + if err != nil { 76 + return err 77 + } 78 + defer resp.Body.Close() 79 + 80 + if !(resp.StatusCode >= 200 && resp.StatusCode < 300) { 81 + var eb atclient.ErrorBody 82 + if err := json.NewDecoder(resp.Body).Decode(&eb); err != nil { 83 + return &atclient.APIError{StatusCode: resp.StatusCode} 84 + } 85 + return eb.APIError(resp.StatusCode) 86 + } 87 + 88 + var out struct { 89 + AccessJwt string `json:"accessJwt"` 90 + RefreshJwt string `json:"refreshJwt"` 91 + } 92 + if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { 93 + return err 94 + } 95 + 96 + a.Session.AccessToken = out.AccessJwt 97 + a.Session.RefreshToken = out.RefreshJwt 98 + 99 + if a.RefreshCallback != nil { 100 + snapshot := a.Session.Clone() 101 + a.RefreshCallback(ctx, snapshot) 102 + } 103 + 104 + return nil 105 + }
+87
sync/atproto_auth_test.go
··· 1 + package sync 2 + 3 + import ( 4 + "context" 5 + "encoding/json" 6 + "net/http" 7 + "net/http/httptest" 8 + "strings" 9 + "testing" 10 + 11 + "github.com/bluesky-social/indigo/atproto/atclient" 12 + ) 13 + 14 + func TestFixedPasswordAuth_Refresh(t *testing.T) { 15 + t.Run("Refreshes with POST", func(t *testing.T) { 16 + methodUsed := "" 17 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 18 + if strings.HasSuffix(r.URL.Path, "refreshSession") { 19 + methodUsed = r.Method 20 + w.Header().Set("Content-Type", "application/json") 21 + json.NewEncoder(w).Encode(map[string]string{ 22 + "accessJwt": "new-access", 23 + "refreshJwt": "new-refresh", 24 + }) 25 + return 26 + } 27 + w.WriteHeader(http.StatusNotFound) 28 + })) 29 + defer server.Close() 30 + 31 + pa := &atclient.PasswordAuth{ 32 + Session: atclient.PasswordSessionData{ 33 + Host: server.URL, 34 + AccessToken: "old-access", 35 + RefreshToken: "old-refresh", 36 + }, 37 + } 38 + fixed := &FixedPasswordAuth{PasswordAuth: pa} 39 + 40 + err := fixed.Refresh(context.Background(), http.DefaultClient, "old-refresh") 41 + if err != nil { 42 + t.Fatalf("Refresh failed: %v", err) 43 + } 44 + 45 + if methodUsed != http.MethodPost { 46 + t.Errorf("Expected method POST, got %s", methodUsed) 47 + } 48 + 49 + if pa.Session.AccessToken != "new-access" { 50 + t.Errorf("Expected new access token, got %s", pa.Session.AccessToken) 51 + } 52 + }) 53 + } 54 + 55 + func TestLibraryPasswordAuth_Refresh_Method(t *testing.T) { 56 + // This test documents the library's buggy behavior (GET instead of POST) 57 + methodUsed := "" 58 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 59 + if strings.HasSuffix(r.URL.Path, "refreshSession") { 60 + methodUsed = r.Method 61 + w.Header().Set("Content-Type", "application/json") 62 + json.NewEncoder(w).Encode(map[string]string{ 63 + "accessJwt": "new-access", 64 + "refreshJwt": "new-refresh", 65 + }) 66 + return 67 + } 68 + })) 69 + defer server.Close() 70 + 71 + pa := &atclient.PasswordAuth{ 72 + Session: atclient.PasswordSessionData{ 73 + Host: server.URL, 74 + AccessToken: "old-access", 75 + RefreshToken: "old-refresh", 76 + }, 77 + } 78 + 79 + // We call the library's method directly 80 + _ = pa.Refresh(context.Background(), http.DefaultClient, "old-refresh") 81 + 82 + if methodUsed == http.MethodGet { 83 + t.Log("Confirmed: Library uses GET for refreshSession (Buggy)") 84 + } else if methodUsed == http.MethodPost { 85 + t.Log("Library uses POST for refreshSession") 86 + } 87 + }
+121
sync/auth.go
··· 1 + package sync 2 + 3 + import ( 4 + "context" 5 + "fmt" 6 + "sync" 7 + 8 + "github.com/bluesky-social/indigo/atproto/atclient" 9 + ) 10 + 11 + type ResolvedIdentity struct { 12 + DID string `json:"did"` 13 + Handle string `json:"handle"` 14 + PDS string `json:"pds"` 15 + SigningKey string `json:"signingKey"` 16 + } 17 + 18 + type Client struct { 19 + client *atclient.APIClient 20 + resolvedIdentity ResolvedIdentity 21 + mu sync.Mutex 22 + } 23 + 24 + func NewClient(ctx context.Context, handle, password string) (*Client, error) { 25 + identity, err := ResolveIdentity(ctx, handle) 26 + if err != nil { 27 + return nil, fmt.Errorf("failed to resolve identity: %w", err) 28 + } 29 + 30 + pdsURL := identity.PDS 31 + if pdsURL == "" { 32 + pdsURL = "https://bsky.social" 33 + } 34 + 35 + client, err := atclient.LoginWithPasswordHost(ctx, pdsURL, handle, password, "", nil) 36 + if err != nil { 37 + return nil, fmt.Errorf("login failed: %w", err) 38 + } 39 + 40 + if pa, ok := client.Auth.(*atclient.PasswordAuth); ok { 41 + client.Auth = &FixedPasswordAuth{PasswordAuth: pa} 42 + } 43 + 44 + return &Client{ 45 + client: client, 46 + resolvedIdentity: identity, 47 + }, nil 48 + } 49 + 50 + func ResolveIdentity(ctx context.Context, handle string) (ResolvedIdentity, error) { 51 + if handle == "" { 52 + return ResolvedIdentity{}, fmt.Errorf("handle cannot be empty") 53 + } 54 + 55 + did, pds, signingKey, err := ResolveMiniDoc(ctx, handle) 56 + if err != nil { 57 + return ResolvedIdentity{}, fmt.Errorf("failed to resolve identity: %w", err) 58 + } 59 + 60 + if did == "" { 61 + return ResolvedIdentity{}, fmt.Errorf("resolved identity missing DID") 62 + } 63 + 64 + if signingKey == "" { 65 + return ResolvedIdentity{}, fmt.Errorf("resolved identity missing signing key") 66 + } 67 + 68 + return ResolvedIdentity{ 69 + DID: did, 70 + Handle: handle, 71 + PDS: pds, 72 + SigningKey: signingKey, 73 + }, nil 74 + } 75 + 76 + func (c *Client) GetDID() string { 77 + c.mu.Lock() 78 + defer c.mu.Unlock() 79 + return c.resolvedIdentity.DID 80 + } 81 + 82 + func (c *Client) GetPDS() string { 83 + c.mu.Lock() 84 + defer c.mu.Unlock() 85 + return c.resolvedIdentity.PDS 86 + } 87 + 88 + func (c *Client) GetHandle() string { 89 + c.mu.Lock() 90 + defer c.mu.Unlock() 91 + return c.resolvedIdentity.Handle 92 + } 93 + 94 + func (c *Client) GetSigningKey() string { 95 + c.mu.Lock() 96 + defer c.mu.Unlock() 97 + return c.resolvedIdentity.SigningKey 98 + } 99 + 100 + func (c *Client) GetAPIClient() *atclient.APIClient { 101 + c.mu.Lock() 102 + defer c.mu.Unlock() 103 + return c.client 104 + } 105 + 106 + func (c *Client) Close() error { 107 + c.mu.Lock() 108 + defer c.mu.Unlock() 109 + if c.client != nil && c.client.Auth != nil { 110 + if logout, ok := c.client.Auth.(*atclient.PasswordAuth); ok { 111 + return logout.Logout(context.Background(), c.client.Client) 112 + } 113 + } 114 + return nil 115 + } 116 + 117 + func (c *Client) HasClient() bool { 118 + c.mu.Lock() 119 + defer c.mu.Unlock() 120 + return c.client != nil 121 + }
+73
sync/config.go
··· 1 + package sync 2 + 3 + import ( 4 + "time" 5 + ) 6 + 7 + const ( 8 + RecordType = "fm.teal.alpha.feed.play" 9 + DefaultBatchSize = 20 10 + DefaultBatchDelay = 2000 * time.Millisecond 11 + MinBatchDelay = 1000 * time.Millisecond 12 + DefaultCrossSourceTolerance = 5 * time.Minute 13 + CrossSourceTolerance = DefaultCrossSourceTolerance 14 + CacheTTL = 24 * time.Hour 15 + CacheVersion = 1 16 + SlingshotResolverURL = "https://slingshot.microcosm.blue/xrpc/com.bad-example.identity.resolveMiniDoc" 17 + MaxRetryDelay = 15 * time.Minute 18 + BaseRetryDelay = 2 * time.Second 19 + MaxRetries = 1000 20 + ) 21 + 22 + type ImportMode string 23 + 24 + const ( 25 + ImportModeLastFM ImportMode = "lastfm" 26 + ImportModeSpotify ImportMode = "spotify" 27 + ImportModeCombined ImportMode = "combined" 28 + ImportModeSync ImportMode = "sync" 29 + ) 30 + 31 + type Config struct { 32 + RecordType string `json:"recordType"` 33 + ClientAgent string `json:"clientAgent"` 34 + BatchSize int `json:"batchSize"` 35 + BatchDelay time.Duration `json:"batchDelay"` 36 + CrossSourceTolerance time.Duration `json:"crossSourceTolerance"` 37 + CacheTTL time.Duration `json:"cacheTTL"` 38 + CacheVersion int `json:"cacheVersion"` 39 + SlingshotResolverURL string `json:"slingshotResolverURL"` 40 + ImportMode ImportMode `json:"importMode"` 41 + UserAgent string `json:"userAgent"` 42 + } 43 + 44 + var DefaultConfig = Config{ 45 + RecordType: RecordType, 46 + ClientAgent: ClientAgent, 47 + BatchSize: DefaultBatchSize, 48 + BatchDelay: DefaultBatchDelay, 49 + CrossSourceTolerance: CrossSourceTolerance, 50 + CacheTTL: CacheTTL, 51 + CacheVersion: CacheVersion, 52 + SlingshotResolverURL: SlingshotResolverURL, 53 + ImportMode: ImportModeLastFM, 54 + } 55 + 56 + type PublishResult struct { 57 + SuccessCount int `json:"successCount"` 58 + ErrorCount int `json:"errorCount"` 59 + Cancelled bool `json:"cancelled"` 60 + Duration time.Duration `json:"duration"` 61 + TotalRecords int `json:"totalRecords"` 62 + RecordsPerMinute float64 `json:"recordsPerMinute"` 63 + FirstRecordTime time.Time `json:"firstRecordTime"` 64 + LastRecordTime time.Time `json:"lastRecordTime"` 65 + } 66 + 67 + func (r *PublishResult) Errored() bool { 68 + return r.ErrorCount > 0 69 + } 70 + 71 + func (r *PublishResult) AllSuccessful() bool { 72 + return r.ErrorCount == 0 && !r.Cancelled 73 + }
+91
sync/import.go
··· 1 + package sync 2 + 3 + import ( 4 + "archive/zip" 5 + "context" 6 + "fmt" 7 + "io" 8 + "io/fs" 9 + "os" 10 + "strings" 11 + "time" 12 + ) 13 + 14 + type Parser interface { 15 + ParseFile(ctx context.Context, r io.Reader) ([]PlayRecord, error) 16 + ParseFS(ctx context.Context, fsys fs.FS) ([]PlayRecord, error) 17 + } 18 + 19 + func ParseInput(ctx context.Context, path string, parser Parser) ([]PlayRecord, error) { 20 + if path == "" { 21 + return nil, nil 22 + } 23 + 24 + info, err := os.Stat(path) 25 + if err != nil { 26 + return nil, fmt.Errorf("stat path: %w", err) 27 + } 28 + 29 + if info.IsDir() { 30 + return parser.ParseFS(ctx, os.DirFS(path)) 31 + } 32 + 33 + if strings.HasSuffix(path, ".zip") { 34 + zf, err := zip.OpenReader(path) 35 + if err != nil { 36 + return nil, fmt.Errorf("open zip: %w", err) 37 + } 38 + defer zf.Close() 39 + return parser.ParseFS(ctx, zf) 40 + } 41 + 42 + file, err := os.Open(path) 43 + if err != nil { 44 + return nil, fmt.Errorf("open file: %w", err) 45 + } 46 + defer file.Close() 47 + return parser.ParseFile(ctx, file) 48 + } 49 + 50 + type ImportOptions struct { 51 + LastFMPath string 52 + SpotifyPath string 53 + Mode ImportMode 54 + Tolerance time.Duration 55 + 56 + LastFMParser Parser 57 + SpotifyParser Parser 58 + } 59 + 60 + func LoadRecordsForImport(ctx context.Context, opts ImportOptions) ([]PlayRecord, int, error) { 61 + var lastfmRecords, spotifyRecords []PlayRecord 62 + var err error 63 + 64 + if opts.Mode == ImportModeLastFM || opts.Mode == ImportModeCombined { 65 + lastfmRecords, err = ParseInput(ctx, opts.LastFMPath, opts.LastFMParser) 66 + if err != nil { 67 + return nil, 0, fmt.Errorf("parse lastfm: %w", err) 68 + } 69 + } 70 + 71 + if opts.Mode == ImportModeSpotify || opts.Mode == ImportModeCombined { 72 + spotifyRecords, err = ParseInput(ctx, opts.SpotifyPath, opts.SpotifyParser) 73 + if err != nil { 74 + return nil, 0, fmt.Errorf("parse spotify: %w", err) 75 + } 76 + } 77 + 78 + totalInput := len(lastfmRecords) + len(spotifyRecords) 79 + 80 + var mergedRecords []PlayRecord 81 + switch opts.Mode { 82 + case ImportModeCombined: 83 + mergedRecords, _ = MergeRecords(lastfmRecords, spotifyRecords, opts.Tolerance) 84 + case ImportModeLastFM: 85 + mergedRecords = lastfmRecords 86 + default: 87 + mergedRecords = spotifyRecords 88 + } 89 + 90 + return mergedRecords, totalInput, nil 91 + }
+152
sync/import_test.go
··· 1 + package sync_test 2 + 3 + import ( 4 + "context" 5 + "encoding/json" 6 + "io" 7 + "io/fs" 8 + "testing" 9 + "time" 10 + 11 + "github.com/bluesky-social/indigo/atproto/atclient" 12 + "tangled.org/karitham.dev/lazuli/cache" 13 + "tangled.org/karitham.dev/lazuli/sync" 14 + ) 15 + 16 + type mockParser struct { 17 + records []sync.PlayRecord 18 + } 19 + 20 + func (m *mockParser) ParseFile(ctx context.Context, r io.Reader) ([]sync.PlayRecord, error) { 21 + return m.records, nil 22 + } 23 + 24 + func (m *mockParser) ParseFS(ctx context.Context, fsys fs.FS) ([]sync.PlayRecord, error) { 25 + return m.records, nil 26 + } 27 + 28 + type mockRepoClient struct { 29 + records []sync.RecordRef 30 + deleted []string 31 + applied []sync.PlayRecord 32 + } 33 + 34 + func (m *mockRepoClient) ListRecords(ctx context.Context, collection string, limit int, cursor string) ([]sync.RecordRef, string, error) { 35 + return m.records, "", nil 36 + } 37 + 38 + func (m *mockRepoClient) ApplyWrites(ctx context.Context, collection string, records []sync.PlayRecord) error { 39 + m.applied = append(m.applied, records...) 40 + return nil 41 + } 42 + 43 + func (m *mockRepoClient) DeleteRecord(ctx context.Context, collection, rkey string) error { 44 + m.deleted = append(m.deleted, rkey) 45 + return nil 46 + } 47 + 48 + type mockAuthClient struct { 49 + did string 50 + } 51 + 52 + func (m *mockAuthClient) GetAPIClient() *atclient.APIClient { return nil } 53 + func (m *mockAuthClient) GetDID() string { return m.did } 54 + 55 + func TestImportE2E(t *testing.T) { 56 + ctx := context.Background() 57 + did := "did:plc:test" 58 + 59 + // 1. Setup Storage 60 + storage, err := cache.NewBoltStorage() 61 + if err != nil { 62 + t.Fatal(err) 63 + } 64 + defer storage.Close() 65 + defer storage.ClearAll() 66 + 67 + // 2. Mock Data 68 + t1 := time.Date(2023, 1, 1, 12, 0, 0, 0, time.UTC) 69 + t2 := time.Date(2023, 1, 1, 12, 0, 5, 0, time.UTC) // Within tolerance (5s) 70 + t3 := time.Date(2023, 1, 2, 12, 0, 0, 0, time.UTC) // New record 71 + 72 + rec1 := sync.PlayRecord{TrackName: "Song A", PlayedTime: sync.Timestamp{Time: t1}} 73 + rec2 := sync.PlayRecord{TrackName: "Song A", PlayedTime: sync.Timestamp{Time: t2}} 74 + rec3 := sync.PlayRecord{TrackName: "Song B", PlayedTime: sync.Timestamp{Time: t3}} 75 + 76 + // 3. Load Records 77 + opts := sync.ImportOptions{ 78 + Mode: sync.ImportModeCombined, 79 + Tolerance: 10 * time.Second, 80 + LastFMParser: &mockParser{records: []sync.PlayRecord{rec1}}, 81 + SpotifyParser: &mockParser{records: []sync.PlayRecord{rec2, rec3}}, 82 + LastFMPath: "import_test.go", // Use existing file to pass Stat 83 + SpotifyPath: "import_test.go", 84 + } 85 + 86 + records, total, err := sync.LoadRecordsForImport(ctx, opts) 87 + if err != nil { 88 + t.Fatal(err) 89 + } 90 + 91 + if total != 3 { 92 + t.Errorf("expected 3 total records, got %d", total) 93 + } 94 + if len(records) != 2 { 95 + t.Errorf("expected 2 merged records, got %d", len(records)) 96 + } 97 + 98 + // 4. Save to storage (as if we just imported them) 99 + newEntries := make(map[string][]byte) 100 + for _, rec := range records { 101 + key := sync.CreateRecordKey(rec) 102 + val, _ := json.Marshal(rec) 103 + newEntries[key] = val 104 + } 105 + if err := storage.SaveRecords(did, newEntries); err != nil { 106 + t.Fatal(err) 107 + } 108 + 109 + // 5. Mock ATProto Client 110 + mockRepo := &mockRepoClient{ 111 + records: []sync.RecordRef{ 112 + {Value: rec1}, // Already exists on remote 113 + }, 114 + } 115 + 116 + // 6. Fetch Existing (Deduplicate) 117 + existing, err := sync.FetchExisting(ctx, mockRepo, did, storage, false) 118 + if err != nil { 119 + t.Fatal(err) 120 + } 121 + 122 + if len(existing) != 1 { 123 + t.Errorf("expected 1 existing record, got %d", len(existing)) 124 + } 125 + 126 + // 7. Publish 127 + limiter := sync.NewRateLimiter(storage) 128 + publishOpts := sync.PublishOptions{ 129 + BatchSize: 10, 130 + ATProtoClient: mockRepo, 131 + Storage: storage, 132 + } 133 + 134 + auth := &mockAuthClient{did: did} 135 + result := sync.Publish(ctx, auth, publishOpts, limiter) 136 + 137 + if result.SuccessCount != 1 { 138 + t.Errorf("expected 1 successful publish, got %d", result.SuccessCount) 139 + } 140 + if len(mockRepo.applied) != 1 { 141 + t.Errorf("expected 1 record applied to repo, got %d", len(mockRepo.applied)) 142 + } 143 + if mockRepo.applied[0].TrackName != "Song B" { 144 + t.Errorf("expected Song B to be published, got %s", mockRepo.applied[0].TrackName) 145 + } 146 + 147 + // 8. Verify storage state 148 + stats, _ := storage.Stats() 149 + if stats.UnpublishedCount != 0 { 150 + t.Errorf("expected 0 unpublished records, got %d", stats.UnpublishedCount) 151 + } 152 + }
+47
sync/logutil/logutil.go
··· 1 + package logutil 2 + 3 + import ( 4 + "errors" 5 + "log/slog" 6 + "time" 7 + 8 + "github.com/bluesky-social/indigo/atproto/atclient" 9 + ) 10 + 11 + type TrackInfo struct { 12 + Name string 13 + Artist string 14 + PlayedAt time.Time 15 + } 16 + 17 + // Track returns slog attributes for a play record. 18 + func Track(name, artist string, playedAt time.Time) slog.Attr { 19 + return slog.Group("track", 20 + slog.String("name", name), 21 + slog.String("artist", artist), 22 + slog.Time("played_at", playedAt), 23 + ) 24 + } 25 + 26 + // DID returns a slog attribute for a DID. 27 + func DID(did string) slog.Attr { 28 + return slog.String("did", did) 29 + } 30 + 31 + // Error returns a slog attribute for an error. 32 + func Error(err error) slog.Attr { 33 + if err == nil { 34 + return slog.Attr{} 35 + } 36 + 37 + var apiErr *atclient.APIError 38 + if errors.As(err, &apiErr) { 39 + return slog.Group("error", 40 + slog.Int("status", apiErr.StatusCode), 41 + slog.String("name", apiErr.Name), 42 + slog.String("message", apiErr.Message), 43 + ) 44 + } 45 + 46 + return slog.String("error", err.Error()) 47 + }
+241
sync/progress.go
··· 1 + package sync 2 + 3 + import ( 4 + "encoding/json" 5 + "fmt" 6 + "os" 7 + "path/filepath" 8 + "sync" 9 + "time" 10 + ) 11 + 12 + const StateDir = "~/.lazuli/state" 13 + 14 + type SyncState struct { 15 + LastProcessedTime time.Time `json:"lastProcessedTime"` 16 + LastProcessedKey string `json:"lastProcessedKey"` 17 + CompletedAt *time.Time `json:"completedAt,omitempty"` 18 + StartedAt time.Time `json:"startedAt"` 19 + TotalRecords int `json:"totalRecords"` 20 + ImportedRecords int `json:"importedRecords"` 21 + FailedRecords int `json:"failedRecords"` 22 + } 23 + 24 + func stateDir() (string, error) { 25 + dir := os.Getenv("LAZULI_STATE_DIR") 26 + if dir != "" { 27 + return dir, nil 28 + } 29 + home, err := os.UserHomeDir() 30 + if err != nil { 31 + return "", err 32 + } 33 + return filepath.Join(home, ".lazuli", "state"), nil 34 + } 35 + 36 + func statePath(did string) (string, error) { 37 + sanitized := did 38 + sanitized = filepath.FromSlash(sanitized) 39 + sanitized = filepath.Clean(sanitized) 40 + return sanitized + ".json", nil 41 + } 42 + 43 + func LoadSyncState(did string) (*SyncState, error) { 44 + dir, err := stateDir() 45 + if err != nil { 46 + return nil, err 47 + } 48 + 49 + path, err := statePath(did) 50 + if err != nil { 51 + return nil, err 52 + } 53 + 54 + fullPath := filepath.Join(dir, path) 55 + data, err := os.ReadFile(fullPath) 56 + if err != nil { 57 + if os.IsNotExist(err) { 58 + return &SyncState{StartedAt: time.Now()}, nil 59 + } 60 + return nil, err 61 + } 62 + 63 + var state SyncState 64 + if err := json.Unmarshal(data, &state); err != nil { 65 + return nil, err 66 + } 67 + 68 + return &state, nil 69 + } 70 + 71 + func SaveSyncState(did string, state *SyncState) error { 72 + dir, err := stateDir() 73 + if err != nil { 74 + return err 75 + } 76 + 77 + if err := os.MkdirAll(dir, 0o755); err != nil { 78 + return err 79 + } 80 + 81 + path, err := statePath(did) 82 + if err != nil { 83 + return err 84 + } 85 + 86 + data, err := json.MarshalIndent(state, "", " ") 87 + if err != nil { 88 + return err 89 + } 90 + 91 + return os.WriteFile(filepath.Join(dir, path), data, 0o644) 92 + } 93 + 94 + func ClearSyncState(did string) error { 95 + dir, err := stateDir() 96 + if err != nil { 97 + return err 98 + } 99 + 100 + path, err := statePath(did) 101 + if err != nil { 102 + return err 103 + } 104 + 105 + fullPath := filepath.Join(dir, path) 106 + if _, err := os.Stat(fullPath); os.IsNotExist(err) { 107 + return nil 108 + } 109 + 110 + return os.Remove(fullPath) 111 + } 112 + 113 + type ProgressTracker struct { 114 + Total int 115 + Completed int 116 + Errors int 117 + StartTime time.Time 118 + LastLogTime time.Time 119 + mu sync.Mutex 120 + 121 + LogInterval time.Duration 122 + LogRecordsMetric int 123 + } 124 + 125 + func NewProgressTracker(total int) *ProgressTracker { 126 + return &ProgressTracker{ 127 + Total: total, 128 + StartTime: time.Now(), 129 + LastLogTime: time.Now(), 130 + LogInterval: 30 * time.Second, 131 + LogRecordsMetric: 1000, 132 + } 133 + } 134 + 135 + func (t *ProgressTracker) Increment(completed int) { 136 + t.mu.Lock() 137 + defer t.mu.Unlock() 138 + t.Completed += completed 139 + } 140 + 141 + func (t *ProgressTracker) IncrementErrors(n int) { 142 + t.mu.Lock() 143 + defer t.mu.Unlock() 144 + t.Errors += n 145 + } 146 + 147 + func (t *ProgressTracker) Progress() (percent float64, eta time.Duration, elapsed time.Duration, rate string) { 148 + t.mu.Lock() 149 + defer t.mu.Unlock() 150 + 151 + elapsed = time.Since(t.StartTime) 152 + if t.Completed == 0 { 153 + return 0, 0, elapsed, "0 rec/min" 154 + } 155 + 156 + perMin := float64(t.Completed) / elapsed.Minutes() 157 + rate = formatRate(perMin) 158 + 159 + if t.Total == 0 { 160 + return 100, 0, elapsed, rate 161 + } 162 + 163 + percent = float64(t.Completed) / float64(t.Total) * 100 164 + if percent >= 100 { 165 + return 100, 0, elapsed, rate 166 + } 167 + 168 + remainingRecords := float64(t.Total - t.Completed) 169 + remainingMinutes := remainingRecords / perMin 170 + eta = time.Duration(remainingMinutes*60) * time.Second 171 + return percent, eta, elapsed, rate 172 + } 173 + 174 + func (t *ProgressTracker) ShouldLog() bool { 175 + t.mu.Lock() 176 + defer t.mu.Unlock() 177 + 178 + if t.Completed == 0 { 179 + return false 180 + } 181 + 182 + now := time.Now() 183 + if now.Sub(t.LastLogTime) >= t.LogInterval { 184 + t.LastLogTime = now 185 + return true 186 + } 187 + if t.Completed%t.LogRecordsMetric == 0 { 188 + return true 189 + } 190 + return false 191 + } 192 + 193 + type ProgressReport struct { 194 + Total int `json:"total"` 195 + Completed int `json:"completed"` 196 + Percent float64 `json:"percent"` 197 + Errors int `json:"errors"` 198 + Elapsed string `json:"elapsed"` 199 + ETA string `json:"eta,omitempty"` 200 + Rate string `json:"rate"` 201 + } 202 + 203 + func (t *ProgressTracker) Report() ProgressReport { 204 + percent, eta, elapsed, rate := t.Progress() 205 + etaStr := "" 206 + if eta > 0 { 207 + etaStr = FormatDuration(eta) 208 + } 209 + return ProgressReport{ 210 + Total: t.Total, 211 + Completed: t.Completed, 212 + Percent: percent, 213 + Errors: t.Errors, 214 + Elapsed: elapsed.Round(time.Second).String(), 215 + ETA: etaStr, 216 + Rate: rate, 217 + } 218 + } 219 + 220 + func formatRate(perMin float64) string { 221 + if perMin >= 1000 { 222 + return fmt.Sprintf("%.1fk/min", perMin/1000) 223 + } 224 + return fmt.Sprintf("%.0f/min", perMin) 225 + } 226 + 227 + func FormatDuration(d time.Duration) string { 228 + if d == 0 { 229 + return "done" 230 + } 231 + hours := int(d.Hours()) 232 + minutes := int(d.Minutes()) % 60 233 + seconds := int(d.Seconds()) % 60 234 + if hours > 0 { 235 + return fmt.Sprintf("%dh %dm", hours, minutes) 236 + } 237 + if minutes > 0 { 238 + return fmt.Sprintf("%dm %ds", minutes, seconds) 239 + } 240 + return fmt.Sprintf("%ds", seconds) 241 + }
+428
sync/publish.go
··· 1 + package sync 2 + 3 + import ( 4 + "context" 5 + "encoding/json" 6 + "errors" 7 + "fmt" 8 + "log/slog" 9 + "math/rand" 10 + "time" 11 + 12 + "github.com/bluesky-social/indigo/atproto/atclient" 13 + "github.com/bluesky-social/indigo/atproto/syntax" 14 + 15 + "tangled.org/karitham.dev/lazuli/cache" 16 + "tangled.org/karitham.dev/lazuli/sync/logutil" 17 + ) 18 + 19 + type PublishOptions struct { 20 + BatchSize int 21 + BatchDelay time.Duration 22 + DryRun bool 23 + ATProtoClient ATProtoClient 24 + ProgressLog func(ProgressReport) 25 + Storage cache.Storage 26 + } 27 + 28 + func Publish(ctx context.Context, client AuthClient, opts PublishOptions, limiter RateLimiter) PublishResult { 29 + startTime := time.Now() 30 + 31 + batchSize := defaultBatchSize(opts.BatchSize) 32 + batchDelay := defaultBatchDelay(opts.BatchDelay) 33 + 34 + atprotoClient, err := buildClient(client, opts.ATProtoClient) 35 + if err != nil { 36 + return PublishResult{ 37 + SuccessCount: 0, 38 + ErrorCount: 0, 39 + Cancelled: false, 40 + Duration: time.Since(startTime), 41 + TotalRecords: 0, 42 + } 43 + } 44 + 45 + totalRecords := 0 46 + if opts.Storage != nil { 47 + _ = opts.Storage.IterateUnpublished(client.GetDID(), func(key string, rec []byte) error { 48 + totalRecords++ 49 + return nil 50 + }) 51 + } 52 + 53 + if totalRecords == 0 { 54 + return PublishResult{} 55 + } 56 + 57 + slog.Info("starting iterative import", 58 + slog.Int("total_records", totalRecords), 59 + slog.Int("batch_size", batchSize), 60 + slog.Duration("batch_delay", batchDelay), 61 + slog.Int("daily_write_limit", WriteLimitDay), 62 + slog.Int("daily_token_limit", GlobalLimitDay), 63 + slog.String("rate_limit", fmt.Sprintf("1 write per %.1fs", 86400.0/WriteLimitDay))) 64 + 65 + tracker := NewProgressTracker(totalRecords) 66 + progressLog := defaultProgressLog(opts.ProgressLog) 67 + totalSuccess := 0 68 + totalErrors := 0 69 + 70 + var batch []PlayRecord 71 + var batchKeys []string 72 + 73 + processBatch := func() error { 74 + if len(batch) == 0 { 75 + return nil 76 + } 77 + 78 + if opts.DryRun { 79 + for _, r := range batch { 80 + tid := syntax.NewTIDFromTime(r.PlayedTime.Time, 0) 81 + slog.Info("would publish record (dry run)", 82 + logutil.Track(r.TrackName, getArtistName(r), r.PlayedTime.Time), 83 + slog.String("rkey", string(tid))) 84 + } 85 + totalSuccess += len(batch) 86 + tracker.Increment(len(batch)) 87 + batch = batch[:0] 88 + batchKeys = batchKeys[:0] 89 + return nil 90 + } 91 + 92 + if err := limiter.AllowBulkWrite(ctx, len(batch)); err != nil { 93 + slog.Error("rate limit wait failed", logutil.Error(err)) 94 + return err 95 + } 96 + 97 + var lastResult BatchResult 98 + did := client.GetDID() 99 + attempt := 0 100 + for { 101 + w, g := limiter.Stats() 102 + lastResult = PublishBatch(ctx, atprotoClient, did, batch, w, g, opts.Storage) 103 + 104 + if lastResult.ErrorCount == 0 { 105 + break 106 + } 107 + 108 + limiter.RefundBulkWrite(len(batch)) 109 + 110 + attempt++ 111 + var apiErr *atclient.APIError 112 + is500 := errors.As(lastResult.LastError, &apiErr) && apiErr.StatusCode >= 500 113 + 114 + if is500 || attempt >= MaxRetries { 115 + if is500 { 116 + first := batch[0] 117 + last := batch[len(batch)-1] 118 + slog.Error("batch failed with 500 error, marking as failed and moving on", 119 + logutil.Error(lastResult.LastError), 120 + slog.Int("count", len(batch)), 121 + slog.Group("range", 122 + slog.Attr(logutil.Track(first.TrackName, getArtistName(first), first.PlayedTime.Time)), 123 + slog.Attr(logutil.Track(last.TrackName, getArtistName(last), last.PlayedTime.Time)))) 124 + } else { 125 + slog.Error("batch failed after max retries", slog.Int("errorCount", lastResult.ErrorCount)) 126 + } 127 + 128 + if opts.Storage != nil { 129 + errMsg := lastResult.LastError.Error() 130 + if err := opts.Storage.MarkFailed(did, batchKeys, errMsg); err != nil { 131 + slog.Error("failed to mark records as failed", logutil.Error(err)) 132 + } 133 + } 134 + break 135 + } 136 + 137 + delay := backoff(attempt) 138 + slog.Warn("batch failed, retrying with backoff", 139 + slog.Int("errorCount", lastResult.ErrorCount), 140 + slog.Duration("retryDelay", delay), 141 + logutil.Error(lastResult.LastError), 142 + slog.Int("attempt", attempt)) 143 + 144 + if !waitForRetry(ctx, delay) { 145 + return ctx.Err() 146 + } 147 + 148 + if err := limiter.AllowBulkWrite(ctx, len(batch)); err != nil { 149 + return err 150 + } 151 + } 152 + 153 + totalSuccess += lastResult.SuccessCount 154 + totalErrors += lastResult.ErrorCount 155 + tracker.Increment(lastResult.SuccessCount) 156 + tracker.IncrementErrors(lastResult.ErrorCount) 157 + 158 + if lastResult.SuccessCount > 0 && opts.Storage != nil { 159 + keys := makeRecordKeys(batch[:lastResult.SuccessCount]) 160 + if err := opts.Storage.MarkPublished(did, keys...); err != nil { 161 + slog.Error("failed to mark records as published", logutil.Error(err)) 162 + } 163 + } 164 + 165 + if tracker.ShouldLog() { 166 + progressLog(tracker.Report()) 167 + } 168 + 169 + batch = batch[:0] 170 + batchKeys = batchKeys[:0] 171 + return nil 172 + } 173 + 174 + err = opts.Storage.IterateUnpublished(client.GetDID(), func(key string, rec []byte) error { 175 + select { 176 + case <-ctx.Done(): 177 + return ctx.Err() 178 + default: 179 + } 180 + 181 + var record PlayRecord 182 + if err := json.Unmarshal(rec, &record); err != nil { 183 + return nil // skip malformed 184 + } 185 + 186 + batch = append(batch, record) 187 + batchKeys = append(batchKeys, key) 188 + 189 + if len(batch) >= batchSize { 190 + if err := processBatch(); err != nil { 191 + return err 192 + } 193 + } 194 + return nil 195 + }) 196 + 197 + if err == nil && len(batch) > 0 { 198 + err = processBatch() 199 + } 200 + 201 + cancelled := false 202 + if err != nil { 203 + slog.Error("import interrupted", logutil.Error(err)) 204 + cancelled = true 205 + } 206 + 207 + logResult(totalSuccess, totalErrors, startTime) 208 + return newPublishResult(totalSuccess, totalErrors, totalRecords, startTime, cancelled) 209 + } 210 + 211 + // waitForRetry waits for the specified duration, returning false if context is cancelled. 212 + func waitForRetry(ctx context.Context, delay time.Duration) bool { 213 + select { 214 + case <-time.After(delay): 215 + return true 216 + case <-ctx.Done(): 217 + slog.Debug("retry cancelled due to context done") 218 + return false 219 + } 220 + } 221 + 222 + func defaultProgressLog(f func(ProgressReport)) func(ProgressReport) { 223 + if f != nil { 224 + return f 225 + } 226 + return func(pr ProgressReport) { 227 + slog.Info("sync progress", 228 + slog.Int("completed", pr.Completed), 229 + slog.Int("total", pr.Total), 230 + slog.Float64("percent", pr.Percent), 231 + slog.String("elapsed", pr.Elapsed), 232 + slog.String("eta", pr.ETA), 233 + slog.String("rate", pr.Rate), 234 + slog.Int("errors", pr.Errors), 235 + ) 236 + } 237 + } 238 + 239 + func defaultBatchSize(size int) int { 240 + if size > 0 { 241 + return size 242 + } 243 + return DefaultBatchSize 244 + } 245 + 246 + func defaultBatchDelay(delay time.Duration) time.Duration { 247 + if delay > 0 { 248 + return delay 249 + } 250 + return DefaultBatchDelay 251 + } 252 + 253 + func buildClient(client AuthClient, customClient ATProtoClient) (ATProtoClient, error) { 254 + if customClient != nil { 255 + return customClient, nil 256 + } 257 + 258 + apiClient := client.GetAPIClient() 259 + if apiClient == nil { 260 + slog.Error("failed to get API client", logutil.Error(fmt.Errorf("client is nil"))) 261 + return nil, fmt.Errorf("API client is nil") 262 + } 263 + 264 + return &atprotoClientAdapter{client: apiClient, did: client.GetDID()}, nil 265 + } 266 + 267 + func newPublishResult(success, errors, total int, start time.Time, cancelled bool) PublishResult { 268 + return PublishResult{ 269 + SuccessCount: success, 270 + ErrorCount: errors, 271 + Cancelled: cancelled, 272 + Duration: time.Since(start), 273 + TotalRecords: total, 274 + RecordsPerMinute: ratePerMinute(success, time.Since(start)), 275 + } 276 + } 277 + 278 + func makeRecordKeys(records []PlayRecord) []string { 279 + keys := make([]string, 0, len(records)) 280 + for _, record := range records { 281 + keys = append(keys, CreateRecordKey(record)) 282 + } 283 + return keys 284 + } 285 + 286 + func logResult(success, errors int, startTime time.Time) { 287 + if errors > 0 { 288 + slog.Warn("import completed with errors", 289 + slog.Int("success", success), 290 + slog.Int("errors", errors)) 291 + } 292 + slog.Info("import completed", 293 + slog.Int("success", success), 294 + slog.Int("errors", errors), 295 + slog.Duration("duration", time.Since(startTime)), 296 + slog.String("rate", formatRate(ratePerMinute(success, time.Since(startTime))))) 297 + } 298 + 299 + func backoff(attempt int) time.Duration { 300 + if attempt <= 0 { 301 + return BaseRetryDelay 302 + } 303 + 304 + // Calculate exponential delay: BaseRetryDelay * 2^(attempt-1) 305 + // We use uint(attempt-1) because 1<<0 is 1 (for first retry) 306 + exp := min(attempt-1, 31) 307 + 308 + delay := BaseRetryDelay * time.Duration(1<<uint(exp)) 309 + 310 + // Cap the delay before adding jitter 311 + if delay > MaxRetryDelay || delay <= 0 { 312 + delay = MaxRetryDelay 313 + } 314 + 315 + // Add up to 25% jitter 316 + var jitter time.Duration 317 + if delay > 4 { 318 + jitter = time.Duration(rand.Int63n(int64(delay / 4))) 319 + } 320 + 321 + return delay + jitter 322 + } 323 + 324 + type BatchResult struct { 325 + SuccessCount int 326 + ErrorCount int 327 + FailedRecords []PlayRecord 328 + LastError error 329 + } 330 + 331 + func PublishBatch(ctx context.Context, client ATProtoClient, did string, batch []PlayRecord, consumedW, consumedG int, storage cache.Storage) BatchResult { 332 + if len(batch) == 0 { 333 + return BatchResult{} 334 + } 335 + 336 + atprotoRecords := prepareRecords(batch) 337 + 338 + err := client.ApplyWrites(ctx, RecordType, atprotoRecords) 339 + if err != nil { 340 + slog.Error("batch publish failed", logutil.Error(err)) 341 + return BatchResult{ErrorCount: len(atprotoRecords), FailedRecords: atprotoRecords, LastError: err} 342 + } 343 + 344 + logBatch(atprotoRecords, consumedW, consumedG) 345 + 346 + if storage != nil && did != "" { 347 + keys := makeRecordKeys(atprotoRecords) 348 + cacheEntries := make(map[string][]byte) 349 + for i, rec := range atprotoRecords { 350 + key := keys[i] 351 + value, _ := json.Marshal(rec) 352 + cacheEntries[key] = value 353 + } 354 + if err := storage.SaveRecords(did, cacheEntries); err != nil { 355 + slog.Debug("failed to add records to cache", logutil.Error(err)) 356 + } 357 + } 358 + 359 + return BatchResult{SuccessCount: len(atprotoRecords)} 360 + } 361 + 362 + func prepareRecords(batch []PlayRecord) []PlayRecord { 363 + atprotoRecords := make([]PlayRecord, 0, len(batch)) 364 + for _, record := range batch { 365 + record.Type = RecordType 366 + record.SubmissionClientAgent = ClientAgent 367 + atprotoRecords = append(atprotoRecords, record) 368 + } 369 + return atprotoRecords 370 + } 371 + 372 + func logBatch(atprotoRecords []PlayRecord, consumedW, consumedG int) { 373 + first := atprotoRecords[0] 374 + last := atprotoRecords[len(atprotoRecords)-1] 375 + slog.Debug("batch published", 376 + slog.Int("records", len(atprotoRecords)), 377 + slog.Int("writes_consumed", consumedW), 378 + slog.Int("writes_limit", WriteLimitDay), 379 + slog.Int("writes_remaining", WriteLimitDay-consumedW), 380 + slog.Int("global_consumed", consumedG), 381 + slog.Int("global_limit", GlobalLimitDay), 382 + slog.Int("global_remaining", GlobalLimitDay-consumedG), 383 + logutil.Track(first.TrackName, getArtistName(first), first.PlayedTime.Time), 384 + logutil.Track(last.TrackName, getArtistName(last), last.PlayedTime.Time)) 385 + } 386 + 387 + func getArtistName(record PlayRecord) string { 388 + if len(record.Artists) > 0 { 389 + return record.Artists[0].ArtistName 390 + } 391 + return "Unknown Artist" 392 + } 393 + 394 + func ratePerMinute(count int, duration time.Duration) float64 { 395 + if duration == 0 { 396 + return 0 397 + } 398 + return float64(count) / duration.Minutes() 399 + } 400 + 401 + type ATProtoClient interface { 402 + ApplyWrites(ctx context.Context, collection string, records []PlayRecord) error 403 + } 404 + 405 + type AuthClient interface { 406 + GetAPIClient() *atclient.APIClient 407 + GetDID() string 408 + } 409 + 410 + type atprotoClientAdapter struct { 411 + client *atclient.APIClient 412 + did string 413 + } 414 + 415 + func (a *atprotoClientAdapter) ApplyWrites(ctx context.Context, collection string, records []PlayRecord) error { 416 + writes, err := prepareWrites(records, collection) 417 + if err != nil { 418 + return err 419 + } 420 + if writes == nil { 421 + return nil 422 + } 423 + 424 + return a.client.Post(ctx, syntax.NSID("com.atproto.repo.applyWrites"), map[string]any{ 425 + "repo": a.did, 426 + "writes": writes, 427 + }, nil) 428 + }
+158
sync/rate.go
··· 1 + package sync 2 + 3 + import ( 4 + "context" 5 + "fmt" 6 + "log/slog" 7 + "sync" 8 + "time" 9 + ) 10 + 11 + const ( 12 + // Limits 13 + WriteLimitDay = 9000 14 + GlobalLimitDay = 35000 15 + 16 + // Costs 17 + ReadGlobalCost = 1 18 + WriteOnlyCost = 1 19 + WriteGlobalCost = 3 20 + 21 + secondsPerDay = 86400 22 + ) 23 + 24 + type RateLimiter interface { 25 + // AllowBulkWrite blocks or returns error until N writes are permissible 26 + AllowBulkWrite(ctx context.Context, n int) error 27 + // AllowRead blocks or returns error until a read is permissible 28 + AllowRead(ctx context.Context) error 29 + // RefundBulkWrite restores N writes to the quota (e.g. after a failed write) 30 + RefundBulkWrite(n int) 31 + // Stats returns current consumption (writes, global) 32 + Stats() (int, int) 33 + } 34 + 35 + type KVStore interface { 36 + Get(key string) (int, error) 37 + Set(key string, val int) error 38 + } 39 + 40 + type quotaLimiter struct { 41 + mu sync.Mutex 42 + kv KVStore 43 + prefix string 44 + } 45 + 46 + func (l *quotaLimiter) Stats() (int, int) { 47 + wKey, gKey := l.getKeys() 48 + w, _ := l.kv.Get(wKey) 49 + g, _ := l.kv.Get(gKey) 50 + return w, g 51 + } 52 + 53 + func NewRateLimiter(kv KVStore) RateLimiter { 54 + return &quotaLimiter{ 55 + kv: kv, 56 + prefix: "quota", 57 + } 58 + } 59 + 60 + func (l *quotaLimiter) getKeys() (string, string) { 61 + day := time.Now().UTC().Format("2006-01-02") 62 + return fmt.Sprintf("%s:writes:%s", l.prefix, day), fmt.Sprintf("%s:global:%s", l.prefix, day) 63 + } 64 + 65 + func (l *quotaLimiter) AllowBulkWrite(ctx context.Context, n int) error { 66 + wKey, gKey := l.getKeys() 67 + wCost := n * WriteOnlyCost 68 + gCost := n * WriteGlobalCost 69 + 70 + return l.wait(ctx, wKey, gKey, wCost, gCost, WriteLimitDay, GlobalLimitDay) 71 + } 72 + 73 + func (l *quotaLimiter) AllowRead(ctx context.Context) error { 74 + _, gKey := l.getKeys() 75 + return l.wait(ctx, "", gKey, 0, ReadGlobalCost, 0, GlobalLimitDay) 76 + } 77 + 78 + func (l *quotaLimiter) RefundBulkWrite(n int) { 79 + l.mu.Lock() 80 + defer l.mu.Unlock() 81 + 82 + wKey, gKey := l.getKeys() 83 + wCost := n * WriteOnlyCost 84 + gCost := n * WriteGlobalCost 85 + 86 + if currW, err := l.kv.Get(wKey); err == nil { 87 + l.kv.Set(wKey, max(0, currW-wCost)) 88 + } 89 + if currG, err := l.kv.Get(gKey); err == nil { 90 + l.kv.Set(gKey, max(0, currG-gCost)) 91 + } 92 + } 93 + 94 + func (l *quotaLimiter) wait(ctx context.Context, wKey, gKey string, wCost, gCost int, wLimit, gLimit int) error { 95 + l.mu.Lock() 96 + defer l.mu.Unlock() 97 + 98 + for { 99 + now := time.Now().UTC() 100 + midnight := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, time.UTC) 101 + elapsed := now.Sub(midnight).Seconds() 102 + 103 + currW := 0 104 + if wKey != "" { 105 + currW, _ = l.kv.Get(wKey) 106 + } 107 + currG, _ := l.kv.Get(gKey) 108 + 109 + var waitW, waitG time.Duration 110 + 111 + // Clamp the 'effective' elapsed time to prevent late-day bursts. 112 + // We allow a maximum credit of 1 minute worth of rate limit (burst window). 113 + const maxCreditSeconds = 60.0 114 + 115 + if wLimit > 0 { 116 + targetW := float64(currW+wCost) * secondsPerDay / float64(wLimit) 117 + // effectively: we can only be 'ahead' by maxCreditSeconds 118 + effectiveElapsed := max(elapsed, targetW-maxCreditSeconds) 119 + if targetW > effectiveElapsed { 120 + waitW = time.Duration((targetW - effectiveElapsed) * float64(time.Second)) 121 + } 122 + } 123 + 124 + if gLimit > 0 { 125 + targetG := float64(currG+gCost) * secondsPerDay / float64(gLimit) 126 + effectiveElapsed := max(elapsed, targetG-maxCreditSeconds) 127 + if targetG > effectiveElapsed { 128 + waitG = time.Duration((targetG - effectiveElapsed) * float64(time.Second)) 129 + } 130 + } 131 + 132 + maxWait := max(waitG, waitW) 133 + 134 + if maxWait <= 0 { 135 + if wKey != "" { 136 + l.kv.Set(wKey, currW+wCost) 137 + } 138 + l.kv.Set(gKey, currG+gCost) 139 + return nil 140 + } 141 + 142 + if maxWait > 1*time.Minute { 143 + slog.Info("Rate limit reached, sleeping", slog.Duration("wait", maxWait.Round(time.Second))) 144 + } 145 + 146 + l.mu.Unlock() 147 + timer := time.NewTimer(maxWait) 148 + select { 149 + case <-ctx.Done(): 150 + timer.Stop() 151 + l.mu.Lock() 152 + return ctx.Err() 153 + case <-timer.C: 154 + l.mu.Lock() 155 + continue 156 + } 157 + } 158 + }
+126
sync/rate_test.go
··· 1 + package sync 2 + 3 + import ( 4 + "context" 5 + "testing" 6 + "time" 7 + ) 8 + 9 + type mockKV struct { 10 + data map[string]int 11 + } 12 + 13 + func (m *mockKV) Get(key string) (int, error) { 14 + return m.data[key], nil 15 + } 16 + 17 + func (m *mockKV) Set(key string, val int) error { 18 + m.data[key] = val 19 + return nil 20 + } 21 + 22 + func TestRateLimiter_Weighting(t *testing.T) { 23 + kv := &mockKV{data: make(map[string]int)} 24 + limiter := NewRateLimiter(kv) 25 + ctx := context.Background() 26 + 27 + // 1 Read = 1 Global 28 + err := limiter.AllowRead(ctx) 29 + if err != nil { 30 + t.Fatal(err) 31 + } 32 + _, g := limiter.Stats() 33 + if g != 1 { 34 + t.Errorf("expected 1 global unit, got %d", g) 35 + } 36 + 37 + // 1 Write = 1 Write-Only + 3 Global 38 + err = limiter.AllowBulkWrite(ctx, 1) 39 + if err != nil { 40 + t.Fatal(err) 41 + } 42 + w, g := limiter.Stats() 43 + if w != 1 { 44 + t.Errorf("expected 1 write unit, got %d", w) 45 + } 46 + if g != 4 { // 1 from read + 3 from write 47 + t.Errorf("expected 4 global units, got %d", g) 48 + } 49 + 50 + // Bulk Write (10 elements) = 10 Write-Only + 30 Global 51 + err = limiter.AllowBulkWrite(ctx, 10) 52 + if err != nil { 53 + t.Fatal(err) 54 + } 55 + w, g = limiter.Stats() 56 + if w != 11 { 57 + t.Errorf("expected 11 write units, got %d", w) 58 + } 59 + if g != 34 { 60 + t.Errorf("expected 34 global units, got %d", g) 61 + } 62 + } 63 + 64 + func TestRateLimiter_Smoothing(t *testing.T) { 65 + kv := &mockKV{data: make(map[string]int)} 66 + limiter := NewRateLimiter(kv) 67 + 68 + // Set consumption to just below the allowed threshold for "now" 69 + // Let's say 1 hour passed since midnight. 70 + // Allowed = (9000 / 86400) * 3600 = 375 71 + 72 + now := time.Now().UTC() 73 + midnight := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, time.UTC) 74 + elapsed := now.Sub(midnight).Seconds() 75 + 76 + allowedW := int((float64(WriteLimitDay) / secondsPerDay) * elapsed) 77 + 78 + wKey, _ := limiter.(*quotaLimiter).getKeys() 79 + _ = kv.Set(wKey, allowedW) 80 + 81 + // Trying to allow 100 more should block. 82 + // We'll use a short timeout context to verify it blocks. 83 + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) 84 + defer cancel() 85 + 86 + err := limiter.AllowBulkWrite(ctx, 100) 87 + if err != context.DeadlineExceeded { 88 + t.Errorf("expected DeadlineExceeded, got %v", err) 89 + } 90 + } 91 + 92 + func TestRateLimiter_Refund(t *testing.T) { 93 + kv := &mockKV{data: make(map[string]int)} 94 + limiter := NewRateLimiter(kv) 95 + ctx := context.Background() 96 + 97 + // Initial state 98 + w, g := limiter.Stats() 99 + if w != 0 || g != 0 { 100 + t.Errorf("expected clean start, got w=%d, g=%d", w, g) 101 + } 102 + 103 + // Consume 104 + err := limiter.AllowBulkWrite(ctx, 10) 105 + if err != nil { 106 + t.Fatal(err) 107 + } 108 + w, g = limiter.Stats() 109 + if w != 10 || g != 30 { 110 + t.Errorf("expected w=10, g=30, got w=%d, g=%d", w, g) 111 + } 112 + 113 + // Refund half 114 + limiter.RefundBulkWrite(5) 115 + w, g = limiter.Stats() 116 + if w != 5 || g != 15 { 117 + t.Errorf("expected w=5, g=15, got w=%d, g=%d", w, g) 118 + } 119 + 120 + // Refund more than exists (should floor at 0) 121 + limiter.RefundBulkWrite(10) 122 + w, g = limiter.Stats() 123 + if w != 0 || g != 0 { 124 + t.Errorf("expected floor at 0, got w=%d, g=%d", w, g) 125 + } 126 + }
+256
sync/record.go
··· 1 + package sync 2 + 3 + import ( 4 + "fmt" 5 + "sort" 6 + "strings" 7 + "time" 8 + "unicode" 9 + 10 + "golang.org/x/text/unicode/norm" 11 + ) 12 + 13 + type Record struct { 14 + Record PlayRecord 15 + IsLastFM bool 16 + Time time.Time 17 + } 18 + 19 + func normalizeString(s string) string { 20 + s = strings.ToLower(s) 21 + s = strings.TrimSpace(s) 22 + 23 + var result strings.Builder 24 + result.Grow(len(s)) 25 + 26 + for _, r := range s { 27 + if unicode.Is(unicode.Mn, r) { 28 + continue 29 + } 30 + decomposed := norm.NFKD.String(string(r)) 31 + if len(decomposed) > 0 { 32 + result.WriteByte(decomposed[0]) 33 + } else { 34 + result.WriteRune(r) 35 + } 36 + } 37 + 38 + s = result.String() 39 + s = strings.Map(func(r rune) rune { 40 + if r >= 128 { 41 + return r 42 + } 43 + if unicode.IsLetter(r) || unicode.IsNumber(r) { 44 + return r 45 + } 46 + return -1 47 + }, s) 48 + 49 + return s 50 + } 51 + 52 + type PlayRecord struct { 53 + Type string `json:"$type"` 54 + TrackName string `json:"trackName"` 55 + Artists []PlayRecordArtist `json:"artists"` 56 + PlayedTime Timestamp `json:"playedTime"` 57 + SubmissionClientAgent string `json:"submissionClientAgent"` 58 + MusicServiceBaseDomain string `json:"musicServiceBaseDomain"` 59 + ReleaseName string `json:"releaseName,omitempty"` 60 + ReleaseMbId string `json:"releaseMbId,omitempty"` 61 + RecordingMbId string `json:"recordingMbId,omitempty"` 62 + OriginUrl string `json:"originUrl"` 63 + MsPlayed int `json:"msPlayed,omitempty"` 64 + } 65 + 66 + type PlayRecordArtist struct { 67 + ArtistName string `json:"artistName"` 68 + ArtistMbId string `json:"artistMbId,omitempty"` 69 + } 70 + 71 + type MergeStats struct { 72 + LastFMTotal int `json:"lastfmTotal"` 73 + SpotifyTotal int `json:"spotifyTotal"` 74 + DuplicatesRemoved int `json:"duplicatesRemoved"` 75 + LastFMUnique int `json:"lastfmUnique"` 76 + SpotifyUnique int `json:"spotifyUnique"` 77 + MergedTotal int `json:"mergedTotal"` 78 + } 79 + 80 + const ( 81 + MusicServiceLastFM = "last.fm" 82 + MusicServiceSpotify = "spotify.com" 83 + 84 + TimeBucketSize = 30 * time.Second 85 + MinListenDuration = 30 * time.Second 86 + ) 87 + 88 + var ClientAgent = "lazuli/dev" 89 + 90 + type NormalizedRecord struct { 91 + original PlayRecord 92 + normalizedTrack string 93 + normalizedArtist string 94 + timestamp time.Time 95 + isLastFM bool 96 + } 97 + 98 + func hasMusicBrainzId(record PlayRecord) bool { 99 + for _, artist := range record.Artists { 100 + if artist.ArtistMbId != "" { 101 + return true 102 + } 103 + } 104 + return record.RecordingMbId != "" 105 + } 106 + 107 + func selectBetterRecord(r1, r2 PlayRecord, r1IsLastFM, r2IsLastFM bool) PlayRecord { 108 + r1HasMbId := hasMusicBrainzId(r1) 109 + r2HasMbId := hasMusicBrainzId(r2) 110 + 111 + if r1IsLastFM && r2IsLastFM { 112 + if r1HasMbId && !r2HasMbId { 113 + return r1 114 + } 115 + if r2HasMbId && !r1HasMbId { 116 + return r2 117 + } 118 + return r1 119 + } 120 + 121 + if r1IsLastFM && !r2IsLastFM { 122 + return r1 123 + } 124 + 125 + if !r1IsLastFM && r2IsLastFM { 126 + return r2 127 + } 128 + 129 + return r1 130 + } 131 + 132 + func CreateRecordKey(record PlayRecord) string { 133 + artist := "" 134 + if len(record.Artists) > 0 { 135 + artist = record.Artists[0].ArtistName 136 + } 137 + // Use UTC and RFC3339 (seconds) for robust deduplication across different environments 138 + return fmt.Sprintf("%s|||%s|||%s", 139 + normalizeString(artist), 140 + normalizeString(record.TrackName), 141 + record.PlayedTime.UTC().Format(time.RFC3339), 142 + ) 143 + } 144 + 145 + func MergeRecords(lastfm, spotify []PlayRecord, tolerance time.Duration) ([]PlayRecord, MergeStats) { 146 + stats := MergeStats{ 147 + LastFMTotal: len(lastfm), 148 + SpotifyTotal: len(spotify), 149 + } 150 + 151 + recordsMap := make(map[string]NormalizedRecord, len(lastfm)+len(spotify)) 152 + 153 + processRecords := func(records []PlayRecord, isLastFM bool) { 154 + for _, record := range records { 155 + normalizedTrack := normalizeString(record.TrackName) 156 + normalizedArtist := normalizeString(getArtistName(record)) 157 + 158 + found := false 159 + 160 + if tolerance > 0 { 161 + toleranceSeconds := int64(tolerance.Seconds()) 162 + bucketKey := record.PlayedTime.Unix() / toleranceSeconds 163 + 164 + for offset := int64(-1); offset <= 1; offset++ { 165 + mapKey := normalizedTrack + "|" + normalizedArtist + "|" + fmt.Sprintf("%d", bucketKey+offset) 166 + if existing, ok := recordsMap[mapKey]; ok { 167 + if abs(existing.timestamp.Sub(record.PlayedTime.Time)) <= tolerance { 168 + better := selectBetterRecord(existing.original, record, existing.isLastFM, isLastFM) 169 + recordsMap[mapKey] = NormalizedRecord{ 170 + original: better, 171 + normalizedTrack: normalizedTrack, 172 + normalizedArtist: normalizedArtist, 173 + timestamp: existing.timestamp, 174 + isLastFM: existing.isLastFM, 175 + } 176 + stats.DuplicatesRemoved++ 177 + found = true 178 + break 179 + } 180 + } 181 + } 182 + } else { 183 + mapKey := normalizedTrack + "|" + normalizedArtist + "|" + record.PlayedTime.Format(time.RFC3339) 184 + if existing, ok := recordsMap[mapKey]; ok { 185 + better := selectBetterRecord(existing.original, record, existing.isLastFM, isLastFM) 186 + recordsMap[mapKey] = NormalizedRecord{ 187 + original: better, 188 + normalizedTrack: normalizedTrack, 189 + normalizedArtist: normalizedArtist, 190 + timestamp: existing.timestamp, 191 + isLastFM: existing.isLastFM, 192 + } 193 + stats.DuplicatesRemoved++ 194 + found = true 195 + } 196 + } 197 + 198 + if !found { 199 + if tolerance > 0 { 200 + toleranceSeconds := int64(tolerance.Seconds()) 201 + bucketKey := record.PlayedTime.Unix() / toleranceSeconds 202 + mapKey := normalizedTrack + "|" + normalizedArtist + "|" + fmt.Sprintf("%d", bucketKey) 203 + recordsMap[mapKey] = NormalizedRecord{ 204 + original: record, 205 + normalizedTrack: normalizedTrack, 206 + normalizedArtist: normalizedArtist, 207 + timestamp: record.PlayedTime.Time, 208 + isLastFM: isLastFM, 209 + } 210 + } else { 211 + mapKey := normalizedTrack + "|" + normalizedArtist + "|" + record.PlayedTime.Format(time.RFC3339) 212 + recordsMap[mapKey] = NormalizedRecord{ 213 + original: record, 214 + normalizedTrack: normalizedTrack, 215 + normalizedArtist: normalizedArtist, 216 + timestamp: record.PlayedTime.Time, 217 + isLastFM: isLastFM, 218 + } 219 + } 220 + if isLastFM { 221 + stats.LastFMUnique++ 222 + } else { 223 + stats.SpotifyUnique++ 224 + } 225 + } 226 + } 227 + } 228 + 229 + processRecords(lastfm, true) 230 + processRecords(spotify, false) 231 + 232 + uniqueRecords := make([]NormalizedRecord, 0, len(recordsMap)) 233 + for _, nr := range recordsMap { 234 + uniqueRecords = append(uniqueRecords, nr) 235 + } 236 + 237 + stats.MergedTotal = len(uniqueRecords) 238 + 239 + sort.Slice(uniqueRecords, func(i, j int) bool { 240 + if !uniqueRecords[i].timestamp.Equal(uniqueRecords[j].timestamp) { 241 + return uniqueRecords[i].timestamp.Before(uniqueRecords[j].timestamp) 242 + } 243 + return uniqueRecords[i].normalizedTrack < uniqueRecords[j].normalizedTrack 244 + }) 245 + 246 + result := make([]PlayRecord, len(uniqueRecords)) 247 + for i, nr := range uniqueRecords { 248 + result[i] = nr.original 249 + } 250 + 251 + return result, stats 252 + } 253 + 254 + func abs[T ~int64](n T) T { 255 + return max(n, -n) 256 + }
+885
sync/record_test.go
··· 1 + package sync 2 + 3 + import ( 4 + "testing" 5 + "time" 6 + ) 7 + 8 + func TestCreateRecordKey(t *testing.T) { 9 + tests := []struct { 10 + name string 11 + record PlayRecord 12 + expected string 13 + }{ 14 + { 15 + name: "basic lowercase", 16 + record: PlayRecord{ 17 + TrackName: "Test Track", 18 + Artists: []PlayRecordArtist{{ArtistName: "Test Artist"}}, 19 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)}, 20 + }, 21 + expected: "testartist|||testtrack|||2024-01-15T10:30:00Z", 22 + }, 23 + { 24 + name: "uppercase converted to lowercase", 25 + record: PlayRecord{ 26 + TrackName: "THERE IS A LIGHT THAT NEVER GOES OUT", 27 + Artists: []PlayRecordArtist{{ArtistName: "THE SMITHS"}}, 28 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)}, 29 + }, 30 + expected: "thesmiths|||thereisalightthatnevergoesout|||2024-01-15T10:30:00Z", 31 + }, 32 + { 33 + name: "punctuation removed", 34 + record: PlayRecord{ 35 + TrackName: "Don't Stop Me Now!", 36 + Artists: []PlayRecordArtist{{ArtistName: "Queen"}}, 37 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)}, 38 + }, 39 + expected: "queen|||dontstopmenow|||2024-01-15T10:30:00Z", 40 + }, 41 + { 42 + name: "special characters removed", 43 + record: PlayRecord{ 44 + TrackName: "Rock & Roll (Part 2)", 45 + Artists: []PlayRecordArtist{{ArtistName: "Gary Glitter"}}, 46 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)}, 47 + }, 48 + expected: "garyglitter|||rockrollpart2|||2024-01-15T10:30:00Z", 49 + }, 50 + { 51 + name: "empty artist defaults to empty string", 52 + record: PlayRecord{ 53 + TrackName: "Unknown Track", 54 + Artists: []PlayRecordArtist{}, 55 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)}, 56 + }, 57 + expected: "|||unknowntrack|||2024-01-15T10:30:00Z", 58 + }, 59 + { 60 + name: "multiple artists only uses first", 61 + record: PlayRecord{ 62 + TrackName: "Song", 63 + Artists: []PlayRecordArtist{{ArtistName: "First Artist"}, {ArtistName: "Second Artist"}}, 64 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)}, 65 + }, 66 + expected: "firstartist|||song|||2024-01-15T10:30:00Z", 67 + }, 68 + { 69 + name: "whitespace normalized", 70 + record: PlayRecord{ 71 + TrackName: "Song With Lots Of Spaces", 72 + Artists: []PlayRecordArtist{{ArtistName: " Artist With Spaces "}}, 73 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)}, 74 + }, 75 + expected: "artistwithspaces|||songwithlotsofspaces|||2024-01-15T10:30:00Z", 76 + }, 77 + { 78 + name: "diacritics normalized", 79 + record: PlayRecord{ 80 + TrackName: "Café", 81 + Artists: []PlayRecordArtist{{ArtistName: "Naïve"}}, 82 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)}, 83 + }, 84 + expected: "naive|||cafe|||2024-01-15T10:30:00Z", 85 + }, 86 + } 87 + 88 + for _, tt := range tests { 89 + t.Run(tt.name, func(t *testing.T) { 90 + result := CreateRecordKey(tt.record) 91 + if result != tt.expected { 92 + t.Errorf("CreateRecordKey() = %q, want %q", result, tt.expected) 93 + } 94 + }) 95 + } 96 + } 97 + 98 + func TestSelectBetterRecord(t *testing.T) { 99 + tests := []struct { 100 + name string 101 + r1 PlayRecord 102 + r2 PlayRecord 103 + r1IsLastFM bool 104 + r2IsLastFM bool 105 + expectedService string 106 + }{ 107 + { 108 + name: "lastfm wins over spotify", 109 + r1: PlayRecord{ 110 + TrackName: "Test", 111 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 112 + MusicServiceBaseDomain: MusicServiceLastFM, 113 + }, 114 + r2: PlayRecord{ 115 + TrackName: "Test", 116 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 117 + MusicServiceBaseDomain: MusicServiceSpotify, 118 + }, 119 + r1IsLastFM: true, 120 + r2IsLastFM: false, 121 + expectedService: MusicServiceLastFM, 122 + }, 123 + { 124 + name: "spotify loses to lastfm even with mbid", 125 + r1: PlayRecord{ 126 + TrackName: "Test", 127 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 128 + MusicServiceBaseDomain: MusicServiceLastFM, 129 + }, 130 + r2: PlayRecord{ 131 + TrackName: "Test", 132 + Artists: []PlayRecordArtist{{ArtistName: "Artist", ArtistMbId: "mbid-spotify"}}, 133 + MusicServiceBaseDomain: MusicServiceSpotify, 134 + }, 135 + r1IsLastFM: true, 136 + r2IsLastFM: false, 137 + expectedService: MusicServiceLastFM, 138 + }, 139 + { 140 + name: "lastfm with mbid wins over spotify without mbid", 141 + r1: PlayRecord{ 142 + TrackName: "Test", 143 + Artists: []PlayRecordArtist{{ArtistName: "Artist", ArtistMbId: "mbid-123"}}, 144 + MusicServiceBaseDomain: MusicServiceLastFM, 145 + }, 146 + r2: PlayRecord{ 147 + TrackName: "Test", 148 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 149 + MusicServiceBaseDomain: MusicServiceSpotify, 150 + }, 151 + r1IsLastFM: true, 152 + r2IsLastFM: false, 153 + expectedService: MusicServiceLastFM, 154 + }, 155 + { 156 + name: "spotify without mbid loses to lastfm with mbid", 157 + r1: PlayRecord{ 158 + TrackName: "Test", 159 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 160 + MusicServiceBaseDomain: MusicServiceLastFM, 161 + }, 162 + r2: PlayRecord{ 163 + TrackName: "Test", 164 + Artists: []PlayRecordArtist{{ArtistName: "Artist", ArtistMbId: "mbid-123"}}, 165 + MusicServiceBaseDomain: MusicServiceSpotify, 166 + }, 167 + r1IsLastFM: true, 168 + r2IsLastFM: false, 169 + expectedService: MusicServiceLastFM, 170 + }, 171 + { 172 + name: "recording mbid takes precedence", 173 + r1: PlayRecord{ 174 + TrackName: "Test", 175 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 176 + RecordingMbId: "mbid-recording", 177 + MusicServiceBaseDomain: MusicServiceLastFM, 178 + }, 179 + r2: PlayRecord{ 180 + TrackName: "Test", 181 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 182 + MusicServiceBaseDomain: MusicServiceLastFM, 183 + }, 184 + r1IsLastFM: true, 185 + r2IsLastFM: true, 186 + expectedService: MusicServiceLastFM, 187 + }, 188 + { 189 + name: "both spotify same source", 190 + r1: PlayRecord{ 191 + TrackName: "Test", 192 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 193 + MusicServiceBaseDomain: MusicServiceSpotify, 194 + }, 195 + r2: PlayRecord{ 196 + TrackName: "Test", 197 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 198 + MusicServiceBaseDomain: MusicServiceSpotify, 199 + }, 200 + r1IsLastFM: false, 201 + r2IsLastFM: false, 202 + expectedService: MusicServiceSpotify, 203 + }, 204 + } 205 + 206 + for _, tt := range tests { 207 + t.Run(tt.name, func(t *testing.T) { 208 + result := selectBetterRecord(tt.r1, tt.r2, tt.r1IsLastFM, tt.r2IsLastFM) 209 + if result.MusicServiceBaseDomain != tt.expectedService { 210 + t.Errorf("selectBetterRecord() = %q, want %q", result.MusicServiceBaseDomain, tt.expectedService) 211 + } 212 + }) 213 + } 214 + } 215 + 216 + func TestMergeRecords(t *testing.T) { 217 + baseTime := time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC) 218 + 219 + tests := []struct { 220 + name string 221 + lastfm []PlayRecord 222 + spotify []PlayRecord 223 + tolerance time.Duration 224 + expectedLen int 225 + expectedDuplicates int 226 + expectedLastFMUnique int 227 + expectedSpotifyUnique int 228 + }{ 229 + { 230 + name: "empty input both", 231 + lastfm: []PlayRecord{}, 232 + spotify: []PlayRecord{}, 233 + tolerance: DefaultCrossSourceTolerance, 234 + expectedLen: 0, 235 + }, 236 + { 237 + name: "empty input lastfm only", 238 + lastfm: []PlayRecord{}, 239 + spotify: []PlayRecord{ 240 + { 241 + TrackName: "Song", 242 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 243 + PlayedTime: Timestamp{Time: baseTime}, 244 + MusicServiceBaseDomain: MusicServiceSpotify, 245 + }, 246 + }, 247 + tolerance: DefaultCrossSourceTolerance, 248 + expectedLen: 1, 249 + expectedLastFMUnique: 0, 250 + expectedSpotifyUnique: 1, 251 + }, 252 + { 253 + name: "empty input spotify only", 254 + lastfm: []PlayRecord{ 255 + { 256 + TrackName: "Song", 257 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 258 + PlayedTime: Timestamp{Time: baseTime}, 259 + MusicServiceBaseDomain: MusicServiceLastFM, 260 + }, 261 + }, 262 + spotify: []PlayRecord{}, 263 + tolerance: DefaultCrossSourceTolerance, 264 + expectedLen: 1, 265 + expectedLastFMUnique: 1, 266 + expectedSpotifyUnique: 0, 267 + }, 268 + { 269 + name: "same timestamp merged", 270 + lastfm: []PlayRecord{ 271 + { 272 + TrackName: "Song", 273 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 274 + PlayedTime: Timestamp{Time: baseTime}, 275 + MusicServiceBaseDomain: MusicServiceLastFM, 276 + }, 277 + }, 278 + spotify: []PlayRecord{ 279 + { 280 + TrackName: "Song", 281 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 282 + PlayedTime: Timestamp{Time: baseTime}, 283 + MusicServiceBaseDomain: MusicServiceSpotify, 284 + }, 285 + }, 286 + tolerance: DefaultCrossSourceTolerance, 287 + expectedLen: 1, 288 + expectedDuplicates: 1, 289 + expectedLastFMUnique: 1, 290 + expectedSpotifyUnique: 0, 291 + }, 292 + { 293 + name: "different songs not merged", 294 + lastfm: []PlayRecord{ 295 + { 296 + TrackName: "Song A", 297 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 298 + PlayedTime: Timestamp{Time: baseTime}, 299 + MusicServiceBaseDomain: MusicServiceLastFM, 300 + }, 301 + }, 302 + spotify: []PlayRecord{ 303 + { 304 + TrackName: "Song B", 305 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 306 + PlayedTime: Timestamp{Time: baseTime}, 307 + MusicServiceBaseDomain: MusicServiceSpotify, 308 + }, 309 + }, 310 + tolerance: DefaultCrossSourceTolerance, 311 + expectedLen: 2, 312 + expectedDuplicates: 0, 313 + }, 314 + { 315 + name: "different artists not merged", 316 + lastfm: []PlayRecord{ 317 + { 318 + TrackName: "Song", 319 + Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, 320 + PlayedTime: Timestamp{Time: baseTime}, 321 + MusicServiceBaseDomain: MusicServiceLastFM, 322 + }, 323 + }, 324 + spotify: []PlayRecord{ 325 + { 326 + TrackName: "Song", 327 + Artists: []PlayRecordArtist{{ArtistName: "Artist B"}}, 328 + PlayedTime: Timestamp{Time: baseTime}, 329 + MusicServiceBaseDomain: MusicServiceSpotify, 330 + }, 331 + }, 332 + tolerance: DefaultCrossSourceTolerance, 333 + expectedLen: 2, 334 + expectedDuplicates: 0, 335 + }, 336 + { 337 + name: "zero tolerance same timestamp merged", 338 + lastfm: []PlayRecord{ 339 + { 340 + TrackName: "Song", 341 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 342 + PlayedTime: Timestamp{Time: baseTime}, 343 + MusicServiceBaseDomain: MusicServiceLastFM, 344 + }, 345 + }, 346 + spotify: []PlayRecord{ 347 + { 348 + TrackName: "Song", 349 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 350 + PlayedTime: Timestamp{Time: baseTime}, 351 + MusicServiceBaseDomain: MusicServiceSpotify, 352 + }, 353 + }, 354 + tolerance: 0, 355 + expectedLen: 1, 356 + expectedDuplicates: 1, 357 + }, 358 + { 359 + name: "partial overlap merged", 360 + lastfm: []PlayRecord{ 361 + { 362 + TrackName: "Song A", 363 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 364 + PlayedTime: Timestamp{Time: baseTime}, 365 + MusicServiceBaseDomain: MusicServiceLastFM, 366 + }, 367 + { 368 + TrackName: "Song B", 369 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 370 + PlayedTime: Timestamp{Time: baseTime.Add(1 * time.Hour)}, 371 + MusicServiceBaseDomain: MusicServiceLastFM, 372 + }, 373 + }, 374 + spotify: []PlayRecord{ 375 + { 376 + TrackName: "Song A", 377 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 378 + PlayedTime: Timestamp{Time: baseTime.Add(10 * time.Second)}, 379 + MusicServiceBaseDomain: MusicServiceSpotify, 380 + }, 381 + { 382 + TrackName: "Song C", 383 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 384 + PlayedTime: Timestamp{Time: baseTime.Add(2 * time.Hour)}, 385 + MusicServiceBaseDomain: MusicServiceSpotify, 386 + }, 387 + }, 388 + tolerance: DefaultCrossSourceTolerance, 389 + expectedLen: 3, 390 + expectedDuplicates: 1, 391 + }, 392 + { 393 + name: "case insensitive matching", 394 + lastfm: []PlayRecord{ 395 + { 396 + TrackName: "SONG", 397 + Artists: []PlayRecordArtist{{ArtistName: "ARTIST"}}, 398 + PlayedTime: Timestamp{Time: baseTime}, 399 + MusicServiceBaseDomain: MusicServiceLastFM, 400 + }, 401 + }, 402 + spotify: []PlayRecord{ 403 + { 404 + TrackName: "song", 405 + Artists: []PlayRecordArtist{{ArtistName: "artist"}}, 406 + PlayedTime: Timestamp{Time: baseTime}, 407 + MusicServiceBaseDomain: MusicServiceSpotify, 408 + }, 409 + }, 410 + tolerance: DefaultCrossSourceTolerance, 411 + expectedLen: 1, 412 + expectedDuplicates: 1, 413 + }, 414 + { 415 + name: "punctuation insensitive matching", 416 + lastfm: []PlayRecord{ 417 + { 418 + TrackName: "Don't Stop!", 419 + Artists: []PlayRecordArtist{{ArtistName: "Queen"}}, 420 + PlayedTime: Timestamp{Time: baseTime}, 421 + MusicServiceBaseDomain: MusicServiceLastFM, 422 + }, 423 + }, 424 + spotify: []PlayRecord{ 425 + { 426 + TrackName: "Dont Stop", 427 + Artists: []PlayRecordArtist{{ArtistName: "Queen"}}, 428 + PlayedTime: Timestamp{Time: baseTime}, 429 + MusicServiceBaseDomain: MusicServiceSpotify, 430 + }, 431 + }, 432 + tolerance: DefaultCrossSourceTolerance, 433 + expectedLen: 1, 434 + expectedDuplicates: 1, 435 + }, 436 + { 437 + name: "many records no duplicates", 438 + lastfm: []PlayRecord{ 439 + {TrackName: "Song 1", Artists: []PlayRecordArtist{{ArtistName: "Artist 1"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 440 + {TrackName: "Song 2", Artists: []PlayRecordArtist{{ArtistName: "Artist 2"}}, PlayedTime: Timestamp{Time: baseTime.Add(1 * time.Minute)}, MusicServiceBaseDomain: MusicServiceLastFM}, 441 + {TrackName: "Song 3", Artists: []PlayRecordArtist{{ArtistName: "Artist 3"}}, PlayedTime: Timestamp{Time: baseTime.Add(2 * time.Minute)}, MusicServiceBaseDomain: MusicServiceLastFM}, 442 + }, 443 + spotify: []PlayRecord{ 444 + {TrackName: "Song 4", Artists: []PlayRecordArtist{{ArtistName: "Artist 4"}}, PlayedTime: Timestamp{Time: baseTime.Add(3 * time.Minute)}, MusicServiceBaseDomain: MusicServiceSpotify}, 445 + {TrackName: "Song 5", Artists: []PlayRecordArtist{{ArtistName: "Artist 5"}}, PlayedTime: Timestamp{Time: baseTime.Add(4 * time.Minute)}, MusicServiceBaseDomain: MusicServiceSpotify}, 446 + {TrackName: "Song 6", Artists: []PlayRecordArtist{{ArtistName: "Artist 6"}}, PlayedTime: Timestamp{Time: baseTime.Add(5 * time.Minute)}, MusicServiceBaseDomain: MusicServiceSpotify}, 447 + }, 448 + tolerance: DefaultCrossSourceTolerance, 449 + expectedLen: 6, 450 + expectedDuplicates: 0, 451 + expectedLastFMUnique: 3, 452 + expectedSpotifyUnique: 3, 453 + }, 454 + { 455 + name: "zero tolerance 1 second apart not merged", 456 + lastfm: []PlayRecord{ 457 + { 458 + TrackName: "Song", 459 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 460 + PlayedTime: Timestamp{Time: baseTime}, 461 + MusicServiceBaseDomain: MusicServiceLastFM, 462 + }, 463 + }, 464 + spotify: []PlayRecord{ 465 + { 466 + TrackName: "Song", 467 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 468 + PlayedTime: Timestamp{Time: baseTime.Add(1 * time.Second)}, 469 + MusicServiceBaseDomain: MusicServiceSpotify, 470 + }, 471 + }, 472 + tolerance: 0, 473 + expectedLen: 2, 474 + expectedDuplicates: 0, 475 + }, 476 + { 477 + name: "five minute tolerance 31 seconds apart merged", 478 + lastfm: []PlayRecord{ 479 + { 480 + TrackName: "Song", 481 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 482 + PlayedTime: Timestamp{Time: baseTime}, 483 + MusicServiceBaseDomain: MusicServiceLastFM, 484 + }, 485 + }, 486 + spotify: []PlayRecord{ 487 + { 488 + TrackName: "Song", 489 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 490 + PlayedTime: Timestamp{Time: baseTime.Add(31 * time.Second)}, 491 + MusicServiceBaseDomain: MusicServiceSpotify, 492 + }, 493 + }, 494 + tolerance: DefaultCrossSourceTolerance, 495 + expectedLen: 1, 496 + expectedDuplicates: 1, 497 + }, 498 + { 499 + name: "one minute tolerance 30 seconds apart merged", 500 + lastfm: []PlayRecord{ 501 + { 502 + TrackName: "Song", 503 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 504 + PlayedTime: Timestamp{Time: baseTime}, 505 + MusicServiceBaseDomain: MusicServiceLastFM, 506 + }, 507 + }, 508 + spotify: []PlayRecord{ 509 + { 510 + TrackName: "Song", 511 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 512 + PlayedTime: Timestamp{Time: baseTime.Add(30 * time.Second)}, 513 + MusicServiceBaseDomain: MusicServiceSpotify, 514 + }, 515 + }, 516 + tolerance: time.Minute, 517 + expectedLen: 1, 518 + expectedDuplicates: 1, 519 + }, 520 + { 521 + name: "30 second tolerance 31 seconds apart not merged", 522 + lastfm: []PlayRecord{ 523 + { 524 + TrackName: "Song", 525 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 526 + PlayedTime: Timestamp{Time: baseTime}, 527 + MusicServiceBaseDomain: MusicServiceLastFM, 528 + }, 529 + }, 530 + spotify: []PlayRecord{ 531 + { 532 + TrackName: "Song", 533 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 534 + PlayedTime: Timestamp{Time: baseTime.Add(31 * time.Second)}, 535 + MusicServiceBaseDomain: MusicServiceSpotify, 536 + }, 537 + }, 538 + tolerance: 30 * time.Second, 539 + expectedLen: 2, 540 + expectedDuplicates: 0, 541 + }, 542 + { 543 + name: "many consecutive deduplications", 544 + lastfm: []PlayRecord{ 545 + {TrackName: "Written In Stone - KAYTRANADA Remix", Artists: []PlayRecordArtist{{ArtistName: "Robert Glasper"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 0, 55, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 546 + {TrackName: "Gum In My Mouth", Artists: []PlayRecordArtist{{ArtistName: "Butcher Brown", ArtistMbId: "c0937ba4-6869-456b-afd0-10335ae50245"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 3, 58, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "ab00c566-3038-4e5f-a5e4-264a9baf542c", RecordingMbId: "5d913a35-fbee-403d-8771-4a7e11013889"}, 547 + {TrackName: "Welcome to the World of the Plastic Beach", Artists: []PlayRecordArtist{{ArtistName: "Gorillaz", ArtistMbId: "e21857d5-3256-4547-afb3-4b6ded592596"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 6, 39, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "28ddf022-0a8a-4ecd-bf18-d80af26c3aff", RecordingMbId: "4d3de31d-d25f-3abf-9bd1-8e38b62dd37e"}, 548 + {TrackName: "Already There", Artists: []PlayRecordArtist{{ArtistName: "Taylor McFerrin", ArtistMbId: "7abc2c7b-f47f-4d94-b75f-8cb4ca926899"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 10, 14, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "2113fbb8-c2b3-4aaf-9b33-92845940f82d"}, 549 + {TrackName: "Here We Go Again", Artists: []PlayRecordArtist{{ArtistName: "Buckshot LeFonque", ArtistMbId: "c1085917-1048-4f49-91d8-f7f7625e3545"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 13, 15, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "02437aee-0d9a-3134-806e-c27d799203d2", RecordingMbId: "0d5ee57b-21cb-329d-872c-43569e13c151"}, 550 + {TrackName: "Life's Work", Artists: []PlayRecordArtist{{ArtistName: "LooPRaT"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 15, 41, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "6221803a-4228-42e2-9191-30723b1faa0e", RecordingMbId: "63d324cd-a34b-455b-a8d6-1572766854b9"}, 551 + {TrackName: "Chaser", Artists: []PlayRecordArtist{{ArtistName: "Electric Wire Hustle", ArtistMbId: "77fc277e-f79d-40b1-b5c8-92702c86b760"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 19, 13, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "1e42946f-5a6b-3177-9fab-73a8b3377486", RecordingMbId: "3d356387-45fc-4ffa-8a34-76aae01f6de7"}, 552 + {TrackName: "Burn & Rise", Artists: []PlayRecordArtist{{ArtistName: "Yazmin Lacey", ArtistMbId: "451919df-764c-40cf-9aa2-fcafe599d869"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 23, 58, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "bdba3763-8f61-4598-8a20-e78424dc744b", RecordingMbId: "4d7b93fc-3651-4396-88b9-7bc98eb35e09"}, 553 + {TrackName: "I Want You", Artists: []PlayRecordArtist{{ArtistName: "Robert Glasper", ArtistMbId: "6e8f82ea-9e6d-4fdd-9b32-32feef13186b"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 26, 38, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "092c640b-c864-4e4b-abab-e44f1c0fe681", RecordingMbId: "369d7f97-1a10-4b8f-b867-05f5b64b5edf"}, 554 + {TrackName: "Go On", Artists: []PlayRecordArtist{{ArtistName: "Snoop Dogg", ArtistMbId: "f90e8b26-9e52-4669-a5c9-e28529c47894"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 21, 8, 2, 43, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "e228272d-9b8c-4993-b2ee-ae9a0dbfe816"}, 555 + }, 556 + spotify: []PlayRecord{ 557 + {TrackName: "Lil Girl feat. Fatima", Artists: []PlayRecordArtist{{ArtistName: "Shafiq Husayn"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 0, 55, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 230200}, 558 + {TrackName: "Written In Stone - KAYTRANADA Remix", Artists: []PlayRecordArtist{{ArtistName: "Robert Glasper"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 3, 58, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 182452}, 559 + {TrackName: "Gum In My Mouth", Artists: []PlayRecordArtist{{ArtistName: "Butcher Brown"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 6, 39, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 159853}, 560 + {TrackName: "Welcome to the World of the Plastic Beach (feat. Snoop Dogg and Hypnotic Brass Ensemble)", Artists: []PlayRecordArtist{{ArtistName: "Gorillaz"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 10, 14, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 215506}, 561 + {TrackName: "Already There", Artists: []PlayRecordArtist{{ArtistName: "Taylor McFerrin"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 13, 15, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 179989}, 562 + {TrackName: "Here We Go Again", Artists: []PlayRecordArtist{{ArtistName: "Buckshot LeFonque"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 15, 41, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 146600}, 563 + {TrackName: "Life's Work", Artists: []PlayRecordArtist{{ArtistName: "LOOPRAT"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 19, 13, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 210280}, 564 + {TrackName: "Chaser", Artists: []PlayRecordArtist{{ArtistName: "Electric Wire Hustle"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 23, 58, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 285493}, 565 + {TrackName: "Burn & Rise", Artists: []PlayRecordArtist{{ArtistName: "Yazmin Lacey"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 26, 38, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 158685}, 566 + {TrackName: "I Want You", Artists: []PlayRecordArtist{{ArtistName: "Robert Glasper"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 20, 18, 29, 2, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 141024}, 567 + }, 568 + tolerance: DefaultCrossSourceTolerance, 569 + expectedLen: 12, 570 + expectedDuplicates: 8, 571 + }, 572 + { 573 + name: "mixed sources and edge cases", 574 + lastfm: []PlayRecord{ 575 + {TrackName: "Roi du nord (Freestyle)", Artists: []PlayRecordArtist{{ArtistName: "Jack Uzi", ArtistMbId: "9b205338-4565-4b14-8e4b-94d1abfedfbc"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 12, 10, 37, 30, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "abc"}, 576 + {TrackName: "Rap conscient", Artists: []PlayRecordArtist{{ArtistName: "Jack Uzi", ArtistMbId: "9b205338-4565-4b14-8e4b-94d1abfedfbc"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 12, 10, 39, 39, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 577 + {TrackName: "Saint jack", Artists: []PlayRecordArtist{{ArtistName: "Jack Uzi", ArtistMbId: "9b205338-4565-4b14-8e4b-94d1abfedfbc"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 12, 10, 41, 57, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 578 + {TrackName: "(I'm a Kadaver) Alakazam", Artists: []PlayRecordArtist{{ArtistName: "Psychedelic Porn Crumpets", ArtistMbId: "11d94660-1963-4020-8762-4c5907e2ea48"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 3, 57, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, RecordingMbId: "xyz"}, 579 + {TrackName: "Teddy Picker", Artists: []PlayRecordArtist{{ArtistName: "Arctic Monkeys", ArtistMbId: "ada7a83c-e3e1-40f1-93f9-3e73dbc9298a"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 7, 43, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM, ReleaseMbId: "def"}, 580 + {TrackName: "New Gold (feat. Tame Impala and Bootie Brown)", Artists: []PlayRecordArtist{{ArtistName: "Gorillaz", ArtistMbId: "e21857d5-3256-4547-afb3-4b6ded592596"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 10, 24, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 581 + {TrackName: "The Pretender", Artists: []PlayRecordArtist{{ArtistName: "Foo Fighters", ArtistMbId: "67f66c07-6e61-4026-ade5-7e782fad3a5d"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 13, 57, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 582 + {TrackName: "Make It Wit Chu", Artists: []PlayRecordArtist{{ArtistName: "Queens of the Stone Age", ArtistMbId: "7dc8f5bd-9d0b-4087-9f73-dc164950bbd8"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 18, 23, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 583 + {TrackName: "Broken Boy", Artists: []PlayRecordArtist{{ArtistName: "Cage the Elephant", ArtistMbId: "b41b38d4-ef3e-4f37-8c75-cfe9af999696"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 23, 11, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 584 + {TrackName: "Take Me Out", Artists: []PlayRecordArtist{{ArtistName: "Franz Ferdinand", ArtistMbId: "aa7a2827-f74b-473c-bd79-03d065835cf7"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 25, 52, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 585 + }, 586 + spotify: []PlayRecord{ 587 + {TrackName: "Roi du nord (Freestyle)", Artists: []PlayRecordArtist{{ArtistName: "Jack Uzi"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 12, 10, 39, 37, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 180000}, 588 + {TrackName: "Rap conscient", Artists: []PlayRecordArtist{{ArtistName: "Jack Uzi"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 12, 10, 41, 59, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 200000}, 589 + {TrackName: "Saint jack", Artists: []PlayRecordArtist{{ArtistName: "Jack Uzi"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 12, 10, 43, 30, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 190000}, 590 + {TrackName: "Ford (Freestyle)", Artists: []PlayRecordArtist{{ArtistName: "Jack Uzi"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 12, 10, 44, 32, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 170000}, 591 + {TrackName: "(I'm a Kadaver) Alakazam", Artists: []PlayRecordArtist{{ArtistName: "Psychedelic Porn Crumpets"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 7, 44, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 228954}, 592 + {TrackName: "Teddy Picker", Artists: []PlayRecordArtist{{ArtistName: "Arctic Monkeys"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 10, 27, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 165000}, 593 + {TrackName: "New Gold (feat. Tame Impala and Bootie Brown)", Artists: []PlayRecordArtist{{ArtistName: "Gorillaz"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 13, 58, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 215149}, 594 + {TrackName: "The Pretender", Artists: []PlayRecordArtist{{ArtistName: "Foo Fighters"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 18, 25, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 269373}, 595 + {TrackName: "Make It Wit Chu", Artists: []PlayRecordArtist{{ArtistName: "Queens of the Stone Age"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 23, 13, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 290493}, 596 + {TrackName: "Broken Boy", Artists: []PlayRecordArtist{{ArtistName: "Cage The Elephant"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 14, 7, 25, 52, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify, MsPlayed: 163200}, 597 + }, 598 + tolerance: DefaultCrossSourceTolerance, 599 + expectedLen: 11, 600 + expectedDuplicates: 9, 601 + }, 602 + { 603 + name: "same song different days not merged", 604 + lastfm: []PlayRecord{ 605 + {TrackName: "After School", Artists: []PlayRecordArtist{{ArtistName: "Weeekly"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 18, 17, 59, 3, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 606 + {TrackName: "Blue Flame", Artists: []PlayRecordArtist{{ArtistName: "LE SSERAFIM"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 18, 17, 59, 4, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 607 + {TrackName: "After LIKE", Artists: []PlayRecordArtist{{ArtistName: "IVE"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 18, 18, 2, 25, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 608 + }, 609 + spotify: []PlayRecord{ 610 + {TrackName: "After School", Artists: []PlayRecordArtist{{ArtistName: "Weeekly"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 18, 18, 2, 26, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify}, 611 + {TrackName: "After LIKE", Artists: []PlayRecordArtist{{ArtistName: "IVE"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 18, 18, 5, 21, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify}, 612 + }, 613 + tolerance: DefaultCrossSourceTolerance, 614 + expectedLen: 3, 615 + expectedDuplicates: 2, 616 + expectedLastFMUnique: 3, 617 + expectedSpotifyUnique: 0, 618 + }, 619 + { 620 + name: "hyphen vs space dedupe with space-less normalization", 621 + lastfm: []PlayRecord{ 622 + {TrackName: "So This is Love?", Artists: []PlayRecordArtist{{ArtistName: "George Benson"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 16, 10, 14, 32, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 623 + {TrackName: "Cream Puff War", Artists: []PlayRecordArtist{{ArtistName: "Grateful Dead"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 16, 10, 18, 12, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 624 + {TrackName: "V Neck Sweater", Artists: []PlayRecordArtist{{ArtistName: "The Greyboy Allstars"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 16, 10, 20, 43, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 625 + {TrackName: "Jungle Boogie-in", Artists: []PlayRecordArtist{{ArtistName: "Ghost-Note"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 16, 10, 22, 53, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 626 + }, 627 + spotify: []PlayRecord{ 628 + {TrackName: "V-Neck Sweater", Artists: []PlayRecordArtist{{ArtistName: "The Greyboy Allstars"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 16, 10, 20, 43, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify}, 629 + {TrackName: "Jungle Boogie-in", Artists: []PlayRecordArtist{{ArtistName: "Ghost-Note"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 16, 10, 22, 53, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify}, 630 + }, 631 + tolerance: DefaultCrossSourceTolerance, 632 + expectedLen: 4, 633 + expectedDuplicates: 2, 634 + expectedLastFMUnique: 4, 635 + expectedSpotifyUnique: 0, 636 + }, 637 + { 638 + name: "korean artist name and diacritics", 639 + lastfm: []PlayRecord{ 640 + {TrackName: "DAAAAAMMMN", Artists: []PlayRecordArtist{{ArtistName: "김재중"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 16, 16, 51, 1, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 641 + {TrackName: "Déjà fait", Artists: []PlayRecordArtist{{ArtistName: "Peet"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 17, 0, 10, 43, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceLastFM}, 642 + }, 643 + spotify: []PlayRecord{ 644 + {TrackName: "DAAAAAMMMN", Artists: []PlayRecordArtist{{ArtistName: "김재중"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 16, 16, 51, 1, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify}, 645 + {TrackName: "Déjà fait", Artists: []PlayRecordArtist{{ArtistName: "Peet"}}, PlayedTime: Timestamp{Time: time.Date(2023, 9, 17, 0, 7, 21, 0, time.UTC)}, MusicServiceBaseDomain: MusicServiceSpotify}, 646 + }, 647 + tolerance: DefaultCrossSourceTolerance, 648 + expectedLen: 2, 649 + expectedDuplicates: 2, 650 + expectedLastFMUnique: 2, 651 + expectedSpotifyUnique: 0, 652 + }, 653 + } 654 + 655 + for _, tt := range tests { 656 + t.Run(tt.name, func(t *testing.T) { 657 + merged, stats := MergeRecords(tt.lastfm, tt.spotify, tt.tolerance) 658 + 659 + if len(merged) != tt.expectedLen { 660 + t.Errorf("len(merged) = %d, want %d", len(merged), tt.expectedLen) 661 + } 662 + if stats.DuplicatesRemoved != tt.expectedDuplicates { 663 + t.Errorf("stats.DuplicatesRemoved = %d, want %d", stats.DuplicatesRemoved, tt.expectedDuplicates) 664 + } 665 + if tt.expectedLastFMUnique > 0 && stats.LastFMUnique != tt.expectedLastFMUnique { 666 + t.Errorf("stats.LastFMUnique = %d, want %d", stats.LastFMUnique, tt.expectedLastFMUnique) 667 + } 668 + if tt.expectedSpotifyUnique > 0 && stats.SpotifyUnique != tt.expectedSpotifyUnique { 669 + t.Errorf("stats.SpotifyUnique = %d, want %d", stats.SpotifyUnique, tt.expectedSpotifyUnique) 670 + } 671 + if stats.LastFMTotal != len(tt.lastfm) { 672 + t.Errorf("stats.LastFMTotal = %d, want %d", stats.LastFMTotal, len(tt.lastfm)) 673 + } 674 + if stats.SpotifyTotal != len(tt.spotify) { 675 + t.Errorf("stats.SpotifyTotal = %d, want %d", stats.SpotifyTotal, len(tt.spotify)) 676 + } 677 + }) 678 + } 679 + } 680 + 681 + func TestMergeRecordsSortedByTime(t *testing.T) { 682 + baseTime := time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC) 683 + 684 + tests := []struct { 685 + name string 686 + lastfm []PlayRecord 687 + spotify []PlayRecord 688 + tolerance time.Duration 689 + expectedOrder []string 690 + }{ 691 + { 692 + name: "unsorted input sorted by time", 693 + lastfm: []PlayRecord{ 694 + {TrackName: "Later", Artists: []PlayRecordArtist{{ArtistName: "A"}}, PlayedTime: Timestamp{Time: baseTime.Add(2 * time.Hour)}, MusicServiceBaseDomain: MusicServiceLastFM}, 695 + {TrackName: "First", Artists: []PlayRecordArtist{{ArtistName: "A"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 696 + }, 697 + spotify: []PlayRecord{ 698 + {TrackName: "Middle", Artists: []PlayRecordArtist{{ArtistName: "A"}}, PlayedTime: Timestamp{Time: baseTime.Add(1 * time.Hour)}, MusicServiceBaseDomain: MusicServiceSpotify}, 699 + }, 700 + tolerance: DefaultCrossSourceTolerance, 701 + expectedOrder: []string{"First", "Middle", "Later"}, 702 + }, 703 + { 704 + name: "same timestamp sorted by track name", 705 + lastfm: []PlayRecord{ 706 + {TrackName: "B Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 707 + {TrackName: "A Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceSpotify}, 708 + }, 709 + spotify: []PlayRecord{}, 710 + tolerance: DefaultCrossSourceTolerance, 711 + expectedOrder: []string{"A Song", "B Song"}, 712 + }, 713 + { 714 + name: "many records out of order", 715 + lastfm: []PlayRecord{ 716 + {TrackName: "Song 5", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(40 * time.Minute)}, MusicServiceBaseDomain: MusicServiceLastFM}, 717 + {TrackName: "Song 1", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 718 + {TrackName: "Song 3", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(20 * time.Minute)}, MusicServiceBaseDomain: MusicServiceLastFM}, 719 + }, 720 + spotify: []PlayRecord{ 721 + {TrackName: "Song 2", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(10 * time.Minute)}, MusicServiceBaseDomain: MusicServiceSpotify}, 722 + {TrackName: "Song 4", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(30 * time.Minute)}, MusicServiceBaseDomain: MusicServiceSpotify}, 723 + }, 724 + tolerance: DefaultCrossSourceTolerance, 725 + expectedOrder: []string{"Song 1", "Song 2", "Song 3", "Song 4", "Song 5"}, 726 + }, 727 + } 728 + 729 + for _, tt := range tests { 730 + t.Run(tt.name, func(t *testing.T) { 731 + merged, _ := MergeRecords(tt.lastfm, tt.spotify, tt.tolerance) 732 + 733 + if len(merged) != len(tt.expectedOrder) { 734 + t.Fatalf("len(merged) = %d, want %d", len(merged), len(tt.expectedOrder)) 735 + } 736 + 737 + for i, expected := range tt.expectedOrder { 738 + if merged[i].TrackName != expected { 739 + t.Errorf("merged[%d].TrackName = %q, want %q", i, merged[i].TrackName, expected) 740 + } 741 + } 742 + }) 743 + } 744 + } 745 + 746 + func TestMergeRecordsLastFMPriority(t *testing.T) { 747 + baseTime := time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC) 748 + 749 + tests := []struct { 750 + name string 751 + lastfm PlayRecord 752 + spotify PlayRecord 753 + tolerance time.Duration 754 + expectedService string 755 + }{ 756 + { 757 + name: "lastfm wins same timestamp", 758 + lastfm: PlayRecord{ 759 + TrackName: "Song", 760 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 761 + PlayedTime: Timestamp{Time: baseTime}, 762 + MusicServiceBaseDomain: MusicServiceLastFM, 763 + }, 764 + spotify: PlayRecord{ 765 + TrackName: "Song", 766 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 767 + PlayedTime: Timestamp{Time: baseTime}, 768 + MusicServiceBaseDomain: MusicServiceSpotify, 769 + }, 770 + tolerance: DefaultCrossSourceTolerance, 771 + expectedService: MusicServiceLastFM, 772 + }, 773 + { 774 + name: "lastfm wins within tolerance", 775 + lastfm: PlayRecord{ 776 + TrackName: "Song", 777 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 778 + PlayedTime: Timestamp{Time: baseTime}, 779 + MusicServiceBaseDomain: MusicServiceLastFM, 780 + }, 781 + spotify: PlayRecord{ 782 + TrackName: "Song", 783 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 784 + PlayedTime: Timestamp{Time: baseTime.Add(29 * time.Second)}, 785 + MusicServiceBaseDomain: MusicServiceSpotify, 786 + }, 787 + tolerance: DefaultCrossSourceTolerance, 788 + expectedService: MusicServiceLastFM, 789 + }, 790 + { 791 + name: "lastfm wins even when later", 792 + lastfm: PlayRecord{ 793 + TrackName: "Song", 794 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 795 + PlayedTime: Timestamp{Time: baseTime.Add(31 * time.Second)}, 796 + MusicServiceBaseDomain: MusicServiceLastFM, 797 + }, 798 + spotify: PlayRecord{ 799 + TrackName: "Song", 800 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 801 + PlayedTime: Timestamp{Time: baseTime}, 802 + MusicServiceBaseDomain: MusicServiceSpotify, 803 + }, 804 + tolerance: DefaultCrossSourceTolerance, 805 + expectedService: MusicServiceLastFM, 806 + }, 807 + } 808 + 809 + for _, tt := range tests { 810 + t.Run(tt.name, func(t *testing.T) { 811 + merged, _ := MergeRecords([]PlayRecord{tt.lastfm}, []PlayRecord{tt.spotify}, tt.tolerance) 812 + 813 + if len(merged) != 1 { 814 + t.Fatalf("len(merged) = %d, want 1", len(merged)) 815 + } 816 + if merged[0].MusicServiceBaseDomain != tt.expectedService { 817 + t.Errorf("merged[0].MusicServiceBaseDomain = %q, want %q", merged[0].MusicServiceBaseDomain, tt.expectedService) 818 + } 819 + }) 820 + } 821 + } 822 + 823 + func TestMergeRecordsStats(t *testing.T) { 824 + baseTime := time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC) 825 + 826 + tests := []struct { 827 + name string 828 + lastfm []PlayRecord 829 + spotify []PlayRecord 830 + tolerance time.Duration 831 + expectedLastFMTotal int 832 + expectedSpotifyTotal int 833 + expectedMergedTotal int 834 + }{ 835 + { 836 + name: "all unique", 837 + lastfm: []PlayRecord{{TrackName: "A", Artists: []PlayRecordArtist{{ArtistName: "X"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}}, 838 + spotify: []PlayRecord{{TrackName: "B", Artists: []PlayRecordArtist{{ArtistName: "Y"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Hour)}, MusicServiceBaseDomain: MusicServiceSpotify}}, 839 + tolerance: DefaultCrossSourceTolerance, 840 + expectedLastFMTotal: 1, 841 + expectedSpotifyTotal: 1, 842 + expectedMergedTotal: 2, 843 + }, 844 + { 845 + name: "all duplicates", 846 + lastfm: []PlayRecord{{TrackName: "A", Artists: []PlayRecordArtist{{ArtistName: "X"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}}, 847 + spotify: []PlayRecord{{TrackName: "A", Artists: []PlayRecordArtist{{ArtistName: "X"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceSpotify}}, 848 + tolerance: DefaultCrossSourceTolerance, 849 + expectedLastFMTotal: 1, 850 + expectedSpotifyTotal: 1, 851 + expectedMergedTotal: 1, 852 + }, 853 + { 854 + name: "mixed", 855 + lastfm: []PlayRecord{ 856 + {TrackName: "A", Artists: []PlayRecordArtist{{ArtistName: "X"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 857 + {TrackName: "B", Artists: []PlayRecordArtist{{ArtistName: "Y"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Hour)}, MusicServiceBaseDomain: MusicServiceLastFM}, 858 + }, 859 + spotify: []PlayRecord{ 860 + {TrackName: "A", Artists: []PlayRecordArtist{{ArtistName: "X"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceSpotify}, 861 + {TrackName: "C", Artists: []PlayRecordArtist{{ArtistName: "Z"}}, PlayedTime: Timestamp{Time: baseTime.Add(2 * time.Hour)}, MusicServiceBaseDomain: MusicServiceSpotify}, 862 + }, 863 + tolerance: DefaultCrossSourceTolerance, 864 + expectedLastFMTotal: 2, 865 + expectedSpotifyTotal: 2, 866 + expectedMergedTotal: 3, 867 + }, 868 + } 869 + 870 + for _, tt := range tests { 871 + t.Run(tt.name, func(t *testing.T) { 872 + _, stats := MergeRecords(tt.lastfm, tt.spotify, tt.tolerance) 873 + 874 + if stats.LastFMTotal != tt.expectedLastFMTotal { 875 + t.Errorf("stats.LastFMTotal = %d, want %d", stats.LastFMTotal, tt.expectedLastFMTotal) 876 + } 877 + if stats.SpotifyTotal != tt.expectedSpotifyTotal { 878 + t.Errorf("stats.SpotifyTotal = %d, want %d", stats.SpotifyTotal, tt.expectedSpotifyTotal) 879 + } 880 + if stats.MergedTotal != tt.expectedMergedTotal { 881 + t.Errorf("stats.MergedTotal = %d, want %d", stats.MergedTotal, tt.expectedMergedTotal) 882 + } 883 + }) 884 + } 885 + }
+328
sync/sync_test.go
··· 1 + package sync 2 + 3 + import ( 4 + "fmt" 5 + "testing" 6 + "time" 7 + ) 8 + 9 + func TestPrepareWrites(t *testing.T) { 10 + baseTime := time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC) 11 + 12 + tests := []struct { 13 + name string 14 + records []PlayRecord 15 + expectedWrites int 16 + expectUnique bool 17 + }{ 18 + { 19 + name: "empty records returns nil", 20 + records: []PlayRecord{}, 21 + expectedWrites: 0, 22 + }, 23 + { 24 + name: "single record", 25 + records: []PlayRecord{ 26 + { 27 + TrackName: "Song A", 28 + Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, 29 + PlayedTime: Timestamp{Time: baseTime}, 30 + }, 31 + }, 32 + expectedWrites: 1, 33 + expectUnique: true, 34 + }, 35 + { 36 + name: "multiple records same timestamp", 37 + records: []PlayRecord{ 38 + {TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}, 39 + {TrackName: "Song B", Artists: []PlayRecordArtist{{ArtistName: "Artist B"}}, PlayedTime: Timestamp{Time: baseTime}}, 40 + {TrackName: "Song C", Artists: []PlayRecordArtist{{ArtistName: "Artist C"}}, PlayedTime: Timestamp{Time: baseTime}}, 41 + }, 42 + expectedWrites: 3, 43 + expectUnique: true, 44 + }, 45 + { 46 + name: "mixed timestamps", 47 + records: []PlayRecord{ 48 + {TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, PlayedTime: Timestamp{Time: baseTime}}, 49 + {TrackName: "Song B", Artists: []PlayRecordArtist{{ArtistName: "Artist B"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Second)}}, 50 + {TrackName: "Song C", Artists: []PlayRecordArtist{{ArtistName: "Artist C"}}, PlayedTime: Timestamp{Time: baseTime}}, 51 + {TrackName: "Song D", Artists: []PlayRecordArtist{{ArtistName: "Artist D"}}, PlayedTime: Timestamp{Time: baseTime.Add(2 * time.Second)}}, 52 + {TrackName: "Song E", Artists: []PlayRecordArtist{{ArtistName: "Artist E"}}, PlayedTime: Timestamp{Time: baseTime}}, 53 + }, 54 + expectedWrites: 5, 55 + expectUnique: true, 56 + }, 57 + } 58 + 59 + for _, tt := range tests { 60 + t.Run(tt.name, func(t *testing.T) { 61 + writes, err := prepareWrites(tt.records, RecordType) 62 + if err != nil { 63 + t.Fatalf("PrepareWrites() error = %v", err) 64 + } 65 + 66 + if tt.expectedWrites == 0 { 67 + if writes != nil { 68 + t.Errorf("PrepareWrites() = %v, want nil", writes) 69 + } 70 + return 71 + } 72 + 73 + if len(writes) != tt.expectedWrites { 74 + t.Errorf("len(writes) = %d, want %d", len(writes), tt.expectedWrites) 75 + } 76 + 77 + if tt.expectUnique { 78 + rkeys := make(map[string]bool) 79 + for _, w := range writes { 80 + rkey := w["rkey"].(string) 81 + if rkeys[rkey] { 82 + t.Errorf("duplicate rkey generated: %s", rkey) 83 + } 84 + rkeys[rkey] = true 85 + } 86 + if len(rkeys) != len(writes) { 87 + t.Errorf("got %d unique rkeys, want %d", len(rkeys), len(writes)) 88 + } 89 + } 90 + }) 91 + } 92 + } 93 + 94 + func TestPrepareWritesManyCollisions(t *testing.T) { 95 + baseTime := time.Date(2024, 1, 15, 10, 0, 0, 123456789, time.UTC) 96 + 97 + numRecords := 50 98 + records := make([]PlayRecord, numRecords) 99 + for i := range numRecords { 100 + records[i] = PlayRecord{ 101 + TrackName: "Song", 102 + Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 103 + PlayedTime: Timestamp{Time: baseTime}, 104 + } 105 + } 106 + 107 + writes, err := prepareWrites(records, RecordType) 108 + if err != nil { 109 + t.Fatalf("prepareWrites() error = %v", err) 110 + } 111 + 112 + if len(writes) != numRecords { 113 + t.Errorf("len(writes) = %d, want %d", len(writes), numRecords) 114 + } 115 + 116 + rkeys := make(map[string]bool) 117 + for _, w := range writes { 118 + rkey := w["rkey"].(string) 119 + if rkeys[rkey] { 120 + t.Errorf("duplicate rkey generated: %s", rkey) 121 + } 122 + rkeys[rkey] = true 123 + } 124 + 125 + if len(rkeys) != numRecords { 126 + t.Errorf("got %d unique rkeys, want %d", len(rkeys), numRecords) 127 + } 128 + } 129 + 130 + func TestFilterNewExcludesExisting(t *testing.T) { 131 + records := []PlayRecord{ 132 + { 133 + TrackName: "Song A", 134 + Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, 135 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC)}, 136 + }, 137 + { 138 + TrackName: "Song B", 139 + Artists: []PlayRecordArtist{{ArtistName: "Artist B"}}, 140 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 1, 0, 0, time.UTC)}, 141 + }, 142 + { 143 + TrackName: "Song C", 144 + Artists: []PlayRecordArtist{{ArtistName: "Artist C"}}, 145 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 2, 0, 0, time.UTC)}, 146 + }, 147 + } 148 + 149 + existing := []ExistingRecord{ 150 + { 151 + URI: "at://did:example:user/fm.teal.alpha.feed.play/abc123", 152 + CID: "bafyreabc123", 153 + Value: PlayRecord{ 154 + TrackName: "Song B", 155 + Artists: []PlayRecordArtist{{ArtistName: "Artist B"}}, 156 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 1, 0, 0, time.UTC)}, 157 + }, 158 + }, 159 + } 160 + 161 + newRecords := FilterNew(records, existing, nil) 162 + 163 + if len(newRecords) != 2 { 164 + t.Errorf("len(newRecords) = %d, want 2", len(newRecords)) 165 + } 166 + 167 + foundSongA := false 168 + foundSongB := false 169 + foundSongC := false 170 + for _, rec := range newRecords { 171 + switch rec.TrackName { 172 + case "Song A": 173 + foundSongA = true 174 + case "Song B": 175 + foundSongB = true 176 + case "Song C": 177 + foundSongC = true 178 + } 179 + } 180 + 181 + if !foundSongA { 182 + t.Error("Song A should be in new records") 183 + } 184 + if foundSongB { 185 + t.Error("Song B should not be in new records (it exists)") 186 + } 187 + if !foundSongC { 188 + t.Error("Song C should be in new records") 189 + } 190 + } 191 + 192 + func TestFilterNewReturnsAllWhenNoneExist(t *testing.T) { 193 + records := []PlayRecord{ 194 + { 195 + TrackName: "Song A", 196 + Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, 197 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC)}, 198 + }, 199 + { 200 + TrackName: "Song B", 201 + Artists: []PlayRecordArtist{{ArtistName: "Artist B"}}, 202 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 1, 0, 0, time.UTC)}, 203 + }, 204 + } 205 + 206 + existing := []ExistingRecord{} 207 + 208 + newRecords := FilterNew(records, existing, nil) 209 + 210 + if len(newRecords) != 2 { 211 + t.Errorf("len(newRecords) = %d, want 2", len(newRecords)) 212 + } 213 + } 214 + 215 + func TestFindDuplicates(t *testing.T) { 216 + tests := []struct { 217 + name string 218 + records []ExistingRecord 219 + expectedDuplicateCount int 220 + }{ 221 + { 222 + name: "finds duplicates", 223 + records: []ExistingRecord{ 224 + { 225 + URI: "at://did:example:user/fm.teal.alpha.feed.play/abc123", 226 + CID: "bafyreabc123", 227 + Value: PlayRecord{ 228 + TrackName: "Same Song", 229 + Artists: []PlayRecordArtist{{ArtistName: "Same Artist"}}, 230 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC)}, 231 + }, 232 + }, 233 + { 234 + URI: "at://did:example:user/fm.teal.alpha.feed.play/def456", 235 + CID: "bafyreedef456", 236 + Value: PlayRecord{ 237 + TrackName: "Same Song", 238 + Artists: []PlayRecordArtist{{ArtistName: "Same Artist"}}, 239 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC)}, 240 + }, 241 + }, 242 + { 243 + URI: "at://did:example:user/fm.teal.alpha.feed.play/ghi789", 244 + CID: "bafyreghi789", 245 + Value: PlayRecord{ 246 + TrackName: "Different Song", 247 + Artists: []PlayRecordArtist{{ArtistName: "Different Artist"}}, 248 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 11, 0, 0, 0, time.UTC)}, 249 + }, 250 + }, 251 + }, 252 + expectedDuplicateCount: 1, 253 + }, 254 + { 255 + name: "returns empty for no duplicates", 256 + records: []ExistingRecord{ 257 + { 258 + URI: "at://did:example:user/fm.teal.alpha.feed.play/abc123", 259 + CID: "bafyreabc123", 260 + Value: PlayRecord{ 261 + TrackName: "Song A", 262 + Artists: []PlayRecordArtist{{ArtistName: "Artist A"}}, 263 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC)}, 264 + }, 265 + }, 266 + { 267 + URI: "at://did:example:user/fm.teal.alpha.feed.play/def456", 268 + CID: "bafyreedef456", 269 + Value: PlayRecord{ 270 + TrackName: "Song B", 271 + Artists: []PlayRecordArtist{{ArtistName: "Artist B"}}, 272 + PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 11, 0, 0, 0, time.UTC)}, 273 + }, 274 + }, 275 + }, 276 + expectedDuplicateCount: 0, 277 + }, 278 + { 279 + name: "handles empty slice", 280 + records: []ExistingRecord{}, 281 + expectedDuplicateCount: 0, 282 + }, 283 + } 284 + 285 + for _, tt := range tests { 286 + t.Run(tt.name, func(t *testing.T) { 287 + duplicates := FindDuplicates(tt.records) 288 + 289 + if len(duplicates) != tt.expectedDuplicateCount { 290 + t.Errorf("len(duplicates) = %d, want %d", len(duplicates), tt.expectedDuplicateCount) 291 + } 292 + 293 + for key, group := range duplicates { 294 + if len(group) < 2 { 295 + t.Errorf("group for key %q should have 2+ records, got %d", key, len(group)) 296 + } 297 + } 298 + }) 299 + } 300 + } 301 + 302 + func TestBackoff(t *testing.T) { 303 + tests := []struct { 304 + attempt int 305 + min time.Duration 306 + max time.Duration 307 + }{ 308 + {0, BaseRetryDelay, BaseRetryDelay + (BaseRetryDelay / 4)}, 309 + {1, BaseRetryDelay, BaseRetryDelay + (BaseRetryDelay / 4)}, 310 + {2, BaseRetryDelay * 2, BaseRetryDelay*2 + (BaseRetryDelay * 2 / 4)}, 311 + {3, BaseRetryDelay * 4, BaseRetryDelay*4 + (BaseRetryDelay * 4 / 4)}, 312 + {10, MaxRetryDelay, MaxRetryDelay + (MaxRetryDelay / 4)}, 313 + {100, MaxRetryDelay, MaxRetryDelay + (MaxRetryDelay / 4)}, 314 + } 315 + 316 + for _, tt := range tests { 317 + t.Run(fmt.Sprintf("attempt %d", tt.attempt), func(t *testing.T) { 318 + d := backoff(tt.attempt) 319 + if d < tt.min { 320 + t.Errorf("backoff(%d) = %v, want >= %v", tt.attempt, d, tt.min) 321 + } 322 + // Allow for jitter in the max check 323 + if d > tt.max { 324 + t.Errorf("backoff(%d) = %v, want <= %v", tt.attempt, d, tt.max) 325 + } 326 + }) 327 + } 328 + }
+35
sync/timestamp.go
··· 1 + package sync 2 + 3 + import ( 4 + "fmt" 5 + "time" 6 + ) 7 + 8 + type Timestamp struct { 9 + time.Time 10 + } 11 + 12 + func (t Timestamp) MarshalJSON() ([]byte, error) { 13 + return []byte(`"` + t.Format(time.RFC3339Nano) + `"`), nil 14 + } 15 + 16 + func (t *Timestamp) UnmarshalJSON(data []byte) error { 17 + if string(data) == "null" { 18 + *t = Timestamp{} 19 + return nil 20 + } 21 + 22 + s := string(data) 23 + s = s[1 : len(s)-1] 24 + 25 + tm, err := time.Parse(time.RFC3339Nano, s) 26 + if err != nil { 27 + tm, err = time.Parse(time.RFC3339, s) 28 + if err != nil { 29 + return fmt.Errorf("failed to parse timestamp %q: %w", s, err) 30 + } 31 + } 32 + 33 + *t = Timestamp{Time: tm} 34 + return nil 35 + }