···21212222### Commands
23232424-| Command | Usage |
2525-| :--- | :--- |
2424+| Command | Usage |
2525+| :------- | :------------------------------------------------------- |
2626| `export` | Parse and merge Last.fm/Spotify exports into a JSON file |
2727-| `import` | Import new records to Bluesky (auto-skips existing) |
2828-| `sync` | Refresh the local cache with records from Bluesky |
2929-| `stats` | Show database status and daily rate limit consumption |
3030-| `failed` | List records that failed to import |
3131-| `retry` | Attempt to re-import failed records |
3232-| `dedupe` | Remove duplicate records from your Bluesky profile |
3333-| `debug` | Dump raw records from Bluesky for troubleshooting |
2727+| `import` | Import new records to Bluesky (auto-skips existing) |
2828+| `sync` | Refresh the local cache with records from Bluesky |
2929+| `stats` | Show database status and daily rate limit consumption |
3030+| `failed` | List records that failed to import |
3131+| `retry` | Attempt to re-import failed records |
3232+| `dedupe` | Remove duplicate records from your Bluesky profile |
3333+| `debug` | Dump raw records from Bluesky for troubleshooting |
34343535### Advanced Options
3636···41414242## Environment Variables
43434444-| Variable | Description |
4545-| ----------------- | -------------------------------------------- |
4646-| `LAZULI_HANDLE` | Bluesky handle (e.g., `user.bsky.social`) |
4747-| `LAZULI_PASSWORD` | Bluesky app password |
4848-| `LAZULI_LASTFM` | Path to Last.fm CSV file |
4949-| `LAZULI_SPOTIFY` | Path to Spotify JSON file/directory/zip |
5050-| `LAZULI_MODE` | Import mode: `lastfm`, `spotify`, `combined` |
5151-| `LAZULI_DRY_RUN` | Preview without publishing |
5252-| `LAZULI_VERBOSE` | Enable verbose logging |
5353-| `LAZULI_REVERSE` | Process records in reverse order |
4444+| Variable | Description |
4545+| ----------------- | ----------------------------------------- |
4646+| `LAZULI_HANDLE` | Bluesky handle (e.g., `user.bsky.social`) |
4747+| `LAZULI_PASSWORD` | Bluesky app password |
4848+| `LAZULI_LASTFM` | Path to Last.fm CSV file |
4949+| `LAZULI_SPOTIFY` | Path to Spotify JSON file/directory/zip |
5050+| `LAZULI_DRY_RUN` | Preview without publishing |
5151+| `LAZULI_VERBOSE` | Enable verbose logging |
5252+| `LAZULI_REVERSE` | Process records in reverse order |
54535554## Input Formats
5655···7170The recommended way to use Spotify data is by passing the **ZIP archive** you receive from Spotify directly. Lazuli will automatically find and parse all streaming history files within it.
72717372Lazuli accepts:
7373+7474- **ZIP archives** containing extended history (Recommended)
7575- Directories containing `Streaming_History_Audio_*.json` files
7676- Single `Streaming_History_Audio_*.json` files
+96
kway/merge.go
···11+package kway
22+33+import (
44+ "container/heap"
55+ "time"
66+)
77+88+// Mergeable defines the interface that types must implement to be used with the generic merge function.
99+type Mergeable[T any] interface {
1010+ Time() time.Time
1111+ IsDuplicate(other T, tol time.Duration) (isMatch bool, preferThis bool)
1212+}
1313+1414+// heapItem represents an item in the merge heap, tracking which source it came from
1515+// and its position within that source.
1616+type heapItem[T Mergeable[T]] struct {
1717+ Value T
1818+ SourceIdx int
1919+ ElementIdx int
2020+}
2121+2222+// mergeHeap implements a min-heap for mergeable items using the Compare method.
2323+type mergeHeap[T Mergeable[T]] []heapItem[T]
2424+2525+func (h mergeHeap[T]) Len() int { return len(h) }
2626+func (h mergeHeap[T]) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
2727+func (h mergeHeap[T]) Less(i, j int) bool { return h[i].Value.Time().Before(h[j].Value.Time()) }
2828+func (h *mergeHeap[T]) Push(x any) { *h = append(*h, x.(heapItem[T])) }
2929+func (h *mergeHeap[T]) Pop() any {
3030+ old := *h
3131+ n := len(old)
3232+ item := old[n-1]
3333+ *h = old[0 : n-1]
3434+ return item
3535+}
3636+3737+// Merge performs a k-way merge of multiple sorted slices of mergeable items.
3838+// It combines the sources while removing duplicates within the specified tolerance.
3939+// The result is sorted according to the Compare method of the items.
4040+func Merge[T Mergeable[T]](sources [][]T, tolerance time.Duration) []T {
4141+ h := &mergeHeap[T]{}
4242+ heap.Init(h)
4343+4444+ // Initialize heap with first item from each source
4545+ for i, src := range sources {
4646+ if len(src) > 0 {
4747+ heap.Push(h, heapItem[T]{Value: src[0], SourceIdx: i, ElementIdx: 0})
4848+ }
4949+ }
5050+5151+ result := make([]T, 0)
5252+ window := make([]T, 0)
5353+5454+ // Process items from the heap
5555+ for h.Len() > 0 {
5656+ curr := heap.Pop(h).(heapItem[T])
5757+5858+ // Push the next item from the same source
5959+ if curr.ElementIdx+1 < len(sources[curr.SourceIdx]) {
6060+ heap.Push(h, heapItem[T]{
6161+ Value: sources[curr.SourceIdx][curr.ElementIdx+1],
6262+ SourceIdx: curr.SourceIdx,
6363+ ElementIdx: curr.ElementIdx + 1,
6464+ })
6565+ }
6666+6767+ currItem := curr.Value
6868+6969+ // Evict items from window that are now older than tolerance relative to currItem
7070+ for len(window) > 0 && currItem.Time().Sub(window[0].Time()) > tolerance {
7171+ result = append(result, window[0])
7272+ window = window[1:]
7373+ }
7474+7575+ // Check for duplicates in window
7676+ found := false
7777+ for i, existing := range window {
7878+ if isMatch, preferCurr := currItem.IsDuplicate(existing, tolerance); isMatch {
7979+ if preferCurr {
8080+ window[i] = currItem
8181+ }
8282+8383+ found = true
8484+ break
8585+ }
8686+ }
8787+8888+ if !found {
8989+ window = append(window, currItem)
9090+ }
9191+ }
9292+9393+ // Flush remaining window
9494+ result = append(result, window...)
9595+ return result
9696+}
+122
kway/merge_test.go
···11+package kway
22+33+import (
44+ "testing"
55+ "time"
66+)
77+88+// TestPlayRecord is a simple implementation of Mergeable for testing
99+type TestPlayRecord struct {
1010+ TrackName string
1111+ Artist string
1212+ time time.Time
1313+ Source string // "lastfm" or "spotify"
1414+ HasMBID bool
1515+}
1616+1717+func (r TestPlayRecord) IsDuplicate(other TestPlayRecord, tolerance time.Duration) (bool, bool) {
1818+ return r.SameAs(other, tolerance), r.BetterThan(other)
1919+}
2020+2121+func (r TestPlayRecord) SameAs(other TestPlayRecord, tolerance time.Duration) bool {
2222+ if r.TrackName != other.TrackName {
2323+ return false
2424+ }
2525+ if r.Artist != other.Artist {
2626+ return false
2727+ }
2828+2929+ diff := r.time.Sub(other.time)
3030+ if diff < 0 {
3131+ diff = -diff
3232+ }
3333+ return diff <= tolerance
3434+}
3535+3636+func (r TestPlayRecord) BetterThan(other TestPlayRecord) bool {
3737+ if r.Source == "lastfm" && other.Source != "lastfm" {
3838+ return true
3939+ }
4040+ if r.Source != "lastfm" && other.Source == "lastfm" {
4141+ return false
4242+ }
4343+ return r.HasMBID && !other.HasMBID
4444+}
4545+4646+func (r TestPlayRecord) Time() time.Time {
4747+ return r.time
4848+}
4949+5050+func TestMerge(t *testing.T) {
5151+ baseTime := time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC)
5252+5353+ // Test case where duplicates are very close in time and should be merged
5454+ lastfmRecords := []TestPlayRecord{
5555+ {TrackName: "Song A", Artist: "Artist", time: baseTime, Source: "lastfm"},
5656+ {TrackName: "Song D", Artist: "Artist", time: baseTime.Add(3 * time.Hour), Source: "lastfm"},
5757+ }
5858+5959+ spotifyRecords := []TestPlayRecord{
6060+ {TrackName: "Song A", Artist: "Artist", time: baseTime.Add(3 * time.Minute), Source: "spotify"}, // Duplicate within tolerance
6161+ {TrackName: "Song B", Artist: "Artist", time: baseTime.Add(time.Hour), Source: "spotify"},
6262+ {TrackName: "Song C", Artist: "Artist", time: baseTime.Add(2 * time.Hour), Source: "spotify"},
6363+ }
6464+6565+ result := Merge([][]TestPlayRecord{lastfmRecords, spotifyRecords}, 10*time.Minute)
6666+6767+ if len(result) != 4 {
6868+ t.Errorf("Expected 4 results, got %d", len(result))
6969+ }
7070+7171+ // Check order - should be sorted by time
7272+ expectedOrder := []string{"Song A", "Song B", "Song C", "Song D"}
7373+ for i, expected := range expectedOrder {
7474+ if i >= len(result) {
7575+ t.Errorf("Missing result at position %d", i)
7676+ break
7777+ }
7878+ if result[i].TrackName != expected {
7979+ t.Errorf("Result %d should be %s, got %s", i, expected, result[i].TrackName)
8080+ }
8181+ }
8282+8383+ // Check that LastFM version is preferred for duplicate Song A
8484+ if result[0].Source != "lastfm" {
8585+ t.Errorf("Duplicate Song A should be from lastfm, got %s", result[0].Source)
8686+ }
8787+}
8888+8989+func TestMergeExactDuplicate(t *testing.T) {
9090+ baseTime := time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC)
9191+9292+ lastfmRecords := []TestPlayRecord{
9393+ {TrackName: "Song A", Artist: "Artist", time: baseTime, Source: "lastfm"},
9494+ }
9595+9696+ spotifyRecords := []TestPlayRecord{
9797+ {TrackName: "Song A", Artist: "Artist", time: baseTime, Source: "spotify"}, // Exact duplicate
9898+ }
9999+100100+ result := Merge([][]TestPlayRecord{lastfmRecords, spotifyRecords}, 0)
101101+102102+ if len(result) != 1 {
103103+ t.Errorf("Expected 1 result, got %d", len(result))
104104+ }
105105+106106+ // Check that LastFM version is preferred
107107+ if result[0].Source != "lastfm" {
108108+ t.Errorf("Duplicate should be from lastfm, got %s", result[0].Source)
109109+ }
110110+}
111111+112112+func TestMergeEmptySources(t *testing.T) {
113113+ result := Merge([][]TestPlayRecord{}, 0)
114114+ if len(result) != 0 {
115115+ t.Errorf("Expected empty result, got %d items", len(result))
116116+ }
117117+118118+ result = Merge([][]TestPlayRecord{{}, {}}, 0)
119119+ if len(result) != 0 {
120120+ t.Errorf("Expected empty result from empty sources, got %d items", len(result))
121121+ }
122122+}