A Wrapped / Replay like for teal.fm and rocksky.app (currently on hiatus)
3
fork

Configure Feed

Select the types of activity you want to include in your feed.

scrobble speed improvements?

Mia bfa3862b c347c57d

+307 -149
+18 -49
Cargo.lock
··· 1111 1111 1112 1112 [[package]] 1113 1113 name = "duckdb" 1114 - version = "1.4.1" 1114 + version = "1.4.2" 1115 1115 source = "registry+https://github.com/rust-lang/crates.io-index" 1116 - checksum = "2a093eed1c714143b257b95fa323e38527fabf05fbf02bb0d5d2045275ffdaef" 1116 + checksum = "e46d5568337ee1f7ea8779e1d9aa2eafcdf156458713ce65afb246c5d2cf5850" 1117 1117 dependencies = [ 1118 1118 "arrow", 1119 1119 "cast", ··· 1203 1203 checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" 1204 1204 dependencies = [ 1205 1205 "libc", 1206 - "windows-sys 0.61.2", 1206 + "windows-sys 0.52.0", 1207 1207 ] 1208 1208 1209 1209 [[package]] ··· 1889 1889 "js-sys", 1890 1890 "log", 1891 1891 "wasm-bindgen", 1892 - "windows-core 0.62.2", 1892 + "windows-core", 1893 1893 ] 1894 1894 1895 1895 [[package]] ··· 2479 2479 2480 2480 [[package]] 2481 2481 name = "libduckdb-sys" 2482 - version = "1.4.1" 2482 + version = "1.4.2" 2483 2483 source = "registry+https://github.com/rust-lang/crates.io-index" 2484 - checksum = "4b93c3ff279601516f01531cadf2ccba50394fbb5f7bf685c6e6b9b07c8dca6f" 2484 + checksum = "6650a7ea86fce24fe1fbf5b037671a8b77c59d135703fc6085b8a1827e66e977" 2485 2485 dependencies = [ 2486 2486 "cc", 2487 2487 "flate2", ··· 3383 3383 "once_cell", 3384 3384 "socket2 0.6.1", 3385 3385 "tracing", 3386 - "windows-sys 0.60.2", 3386 + "windows-sys 0.52.0", 3387 3387 ] 3388 3388 3389 3389 [[package]] ··· 3733 3733 "errno", 3734 3734 "libc", 3735 3735 "linux-raw-sys", 3736 - "windows-sys 0.61.2", 3736 + "windows-sys 0.52.0", 3737 3737 ] 3738 3738 3739 3739 [[package]] ··· 4437 4437 "getrandom 0.3.4", 4438 4438 "once_cell", 4439 4439 "rustix", 4440 - "windows-sys 0.61.2", 4440 + "windows-sys 0.52.0", 4441 4441 ] 4442 4442 4443 4443 [[package]] ··· 5148 5148 source = "registry+https://github.com/rust-lang/crates.io-index" 5149 5149 checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" 5150 5150 dependencies = [ 5151 - "windows-sys 0.61.2", 5151 + "windows-sys 0.48.0", 5152 5152 ] 5153 5153 5154 5154 [[package]] ··· 5164 5164 checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" 5165 5165 dependencies = [ 5166 5166 "windows-collections", 5167 - "windows-core 0.61.2", 5167 + "windows-core", 5168 5168 "windows-future", 5169 5169 "windows-link 0.1.3", 5170 5170 "windows-numerics", ··· 5176 5176 source = "registry+https://github.com/rust-lang/crates.io-index" 5177 5177 checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" 5178 5178 dependencies = [ 5179 - "windows-core 0.61.2", 5179 + "windows-core", 5180 5180 ] 5181 5181 5182 5182 [[package]] ··· 5188 5188 "windows-implement", 5189 5189 "windows-interface", 5190 5190 "windows-link 0.1.3", 5191 - "windows-result 0.3.4", 5192 - "windows-strings 0.4.2", 5193 - ] 5194 - 5195 - [[package]] 5196 - name = "windows-core" 5197 - version = "0.62.2" 5198 - source = "registry+https://github.com/rust-lang/crates.io-index" 5199 - checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" 5200 - dependencies = [ 5201 - "windows-implement", 5202 - "windows-interface", 5203 - "windows-link 0.2.1", 5204 - "windows-result 0.4.1", 5205 - "windows-strings 0.5.1", 5191 + "windows-result", 5192 + "windows-strings", 5206 5193 ] 5207 5194 5208 5195 [[package]] ··· 5211 5198 source = "registry+https://github.com/rust-lang/crates.io-index" 5212 5199 checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" 5213 5200 dependencies = [ 5214 - "windows-core 0.61.2", 5201 + "windows-core", 5215 5202 "windows-link 0.1.3", 5216 5203 "windows-threading", 5217 5204 ] ··· 5256 5243 source = "registry+https://github.com/rust-lang/crates.io-index" 5257 5244 checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" 5258 5245 dependencies = [ 5259 - "windows-core 0.61.2", 5246 + "windows-core", 5260 5247 "windows-link 0.1.3", 5261 5248 ] 5262 5249 ··· 5267 5254 checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" 5268 5255 dependencies = [ 5269 5256 "windows-link 0.1.3", 5270 - "windows-result 0.3.4", 5271 - "windows-strings 0.4.2", 5257 + "windows-result", 5258 + "windows-strings", 5272 5259 ] 5273 5260 5274 5261 [[package]] ··· 5281 5268 ] 5282 5269 5283 5270 [[package]] 5284 - name = "windows-result" 5285 - version = "0.4.1" 5286 - source = "registry+https://github.com/rust-lang/crates.io-index" 5287 - checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" 5288 - dependencies = [ 5289 - "windows-link 0.2.1", 5290 - ] 5291 - 5292 - [[package]] 5293 5271 name = "windows-strings" 5294 5272 version = "0.4.2" 5295 5273 source = "registry+https://github.com/rust-lang/crates.io-index" 5296 5274 checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" 5297 5275 dependencies = [ 5298 5276 "windows-link 0.1.3", 5299 - ] 5300 - 5301 - [[package]] 5302 - name = "windows-strings" 5303 - version = "0.5.1" 5304 - source = "registry+https://github.com/rust-lang/crates.io-index" 5305 - checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" 5306 - dependencies = [ 5307 - "windows-link 0.2.1", 5308 5277 ] 5309 5278 5310 5279 [[package]]
+4 -1
src/ingest/scrobbles.rs
··· 30 30 .map(|v| v.artist_name.as_str()); 31 31 32 32 let find = FindMbzData { 33 + track_name: scrobble.track_name.as_str(), 33 34 release_name: null_if_empty(scrobble.release_name.as_deref()), 34 35 release_mbid: null_if_empty(scrobble.release_mb_id.as_deref()), 35 36 release_discrim: scrobble.release_discriminant.as_deref(), 36 37 recording_mbid: null_if_empty(scrobble.recording_mb_id.as_deref()), 38 + isrc: null_if_empty(scrobble.isrc.as_deref()), 37 39 artist_name: artist, 38 40 track_discrim: scrobble.track_discriminant.as_deref(), 39 41 }; ··· 59 61 } 60 62 61 63 let find = FindMbzData { 64 + track_name: &scrobble.title, 62 65 release_name: Some(&scrobble.album), 63 66 release_mbid: None, 64 67 recording_mbid: None, ··· 79 82 find: FindMbzData<'_>, 80 83 created: DateTime<Utc>, 81 84 ) -> duckdb::Result<()> { 82 - if let Some(data) = try_find_mbz_data(conn, track, &find)? { 85 + if let Some(data) = try_find_mbz_data(conn, &find)? { 83 86 conn.execute( 84 87 r"INSERT INTO scrobbles (did, rkey, track_name, track_mbid, release_name, 85 88 release_mbid, release_group_name, release_group_mbid, artists, created_at, debug)
+60
src/mbz/find_by_fts.sql
··· 1 + with 2 + releases as ( 3 + select *, rel_score: mbz.fts_release ($1, $2) 4 + from mbz.release 5 + where 6 + ( 7 + rel_score is not null 8 + and rel_score > 5 9 + and $2 IS NULL 10 + ) 11 + or ( 12 + $2 IS NOT NULL 13 + and release.gid = $2 14 + ) 15 + ), 16 + recordings as ( 17 + select *, rec_score: mbz.fts_recording ($3, $4) 18 + from mbz.recording 19 + where 20 + ( 21 + rec_score is not null 22 + and rec_score > 5 23 + and $4 IS NULL 24 + ) 25 + or ( 26 + $4 IS NOT NULL 27 + and recording.gid = $4 28 + ) 29 + ), 30 + artists as ( 31 + select *, artist_score: mbz.fts_artists ($5, coalesce($2, $4)), 32 + from mbz.artist_credit 33 + where 34 + artist_score is not null 35 + and artist_score > 2.5 36 + ) 37 + select 38 + track_name: track.name, 39 + track_gid: track.gid, 40 + release_gid: releases.gid, 41 + release_name: releases.name, 42 + recording_gid: recordings.gid, 43 + release_grp_gid: release_group.gid, 44 + release_grp_name: release_group.name, 45 + artists: artists.name, 46 + rel_score, 47 + rec_score, 48 + artist_score: coalesce(artist_score, 0) 49 + from mbz.track 50 + inner join recordings on recordings.id = track.recording 51 + inner join mbz.medium on medium.id = track.medium 52 + inner join releases on releases.id = medium.release 53 + inner join mbz.release_group on release_group.id = releases.release_group 54 + left join artists on artists.id = recordings.artist_credit 55 + where is_data_track = false 56 + order by 57 + rel_score desc, 58 + rec_score desc, 59 + artist_score desc 60 + limit 10;
+42
src/mbz/find_by_isrc.sql
··· 1 + with 2 + releases as ( 3 + select *, rel_score: mbz.fts_release ($1, $2) 4 + from mbz.release 5 + where 6 + ( 7 + rel_score is not null 8 + and rel_score > 2 9 + and $2 IS NULL 10 + ) 11 + or ( 12 + $2 IS NOT NULL 13 + and release.gid = $2 14 + ) 15 + ) 16 + select 17 + track_name: track.name, 18 + track_gid: track.gid, 19 + release_gid: releases.gid, 20 + release_name: releases.name, 21 + recording_gid: recording.gid, 22 + release_grp_gid: release_group.gid, 23 + release_grp_name: release_group.name, 24 + artists: artist_credit.name, 25 + rel_score, 26 + rec_score: 10, 27 + artist_score: 10 28 + from mbz.track 29 + inner join mbz.isrc on isrc.recording = track.recording 30 + inner join mbz.recording on recording.id = isrc.recording 31 + inner join mbz.medium on medium.id = track.medium 32 + inner join releases on releases.id = medium.release 33 + inner join mbz.release_group on release_group.id = releases.release_group 34 + left join mbz.artist_credit on artist_credit.id = recording.artist_credit 35 + where 36 + is_data_track = false 37 + and isrc.isrc = $3 38 + order by 39 + rel_score desc, 40 + rec_score desc, 41 + artist_score desc 42 + limit 10;
+27
src/mbz/find_by_mbid_all.sql
··· 1 + select 2 + track_name: track.name, 3 + track_gid: track.gid, 4 + release_gid: release.gid, 5 + release_name: release.name, 6 + recording_gid: recording.gid, 7 + release_grp_gid: release_group.gid, 8 + release_grp_name: release_group.name, 9 + artists: artist_credit.name, 10 + rel_score: 10, 11 + rec_score: 10, 12 + artist_score: 10 13 + from mbz.track 14 + inner join mbz.recording on recording.id = track.recording 15 + inner join mbz.medium on medium.id = track.medium 16 + inner join mbz.release on release.id = medium.release 17 + inner join mbz.release_group on release_group.id = release.release_group 18 + left join mbz.artist_credit on artist_credit.id = recording.artist_credit 19 + where 20 + is_data_track = false 21 + and recording.gid = $1 22 + and release.gid = $2 23 + order by 24 + rel_score desc, 25 + rec_score desc, 26 + artist_score desc 27 + limit 10;
+7 -1
src/mbz/init_fts.sql
··· 10 10 create macro if not exists mbz.fts_recording (term, gid) AS CASE 11 11 WHEN gid IS NOT NULL THEN 10 12 12 ELSE fts_mbz_recording.match_bm25 (recording.id, term) * if (lower(recording.name) = lower(term), 2.5, 1) 13 - END; 13 + END; 14 + 15 + -- like the above but `trig` takes the release/recording mbid/isrc to disable the query 16 + create macro if not exists mbz.fts_artists(term, trig) AS CASE 17 + WHEN trig IS NOT NULL THEN 10 18 + ELSE fts_mbz_artist_credit.match_bm25(artist_credit.id, term) * if (lower(artist_credit.name) = lower(term), 2.5, 1) 19 + END;
+4 -75
src/mbz/mod.rs
··· 1 1 use duckdb::{Connection, params}; 2 2 3 + mod query; 3 4 mod replica; 4 5 5 6 pub use replica::ReplicationAgent; 6 - 7 - #[derive(Debug, Default)] 8 - pub struct FindMbzData<'a> { 9 - pub release_name: Option<&'a str>, 10 - pub release_mbid: Option<&'a str>, 11 - pub release_discrim: Option<&'a str>, 12 - pub recording_mbid: Option<&'a str>, 13 - pub artist_name: Option<&'a str>, 14 - pub track_discrim: Option<&'a str>, 15 - } 16 - 17 - #[derive(Debug)] 18 - pub struct MbzQueryRes { 19 - pub track: String, 20 - pub track_gid: String, 21 - pub release_gid: Option<String>, 22 - pub release: Option<String>, 23 - pub recording_gid: Option<String>, 24 - pub release_group_gid: Option<String>, 25 - pub release_group: Option<String>, 26 - pub artists: Option<String>, 27 - pub debug: String, 28 - } 29 - 30 - #[derive(Debug)] 31 - struct MbzDebug { 32 - release: f64, 33 - recording: f64, 34 - artist: f64, 35 - } 36 - 37 - impl std::fmt::Display for MbzDebug { 38 - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 39 - write!( 40 - f, 41 - "release={:.3};recording={:.3};artist={:.3}", 42 - self.release, self.recording, self.artist 43 - ) 44 - } 45 - } 7 + pub use query::{FindMbzData, MbzQueryRes}; 46 8 47 9 pub fn try_find_mbz_data( 48 10 conn: &Connection, 49 - track_name: &str, 50 11 opts: &FindMbzData, 51 12 ) -> duckdb::Result<Option<MbzQueryRes>> { 52 - let mut prep = conn.prepare_cached(include_str!("query.sql"))?; 53 - 54 - let mut rows = prep 55 - .query_map( 56 - params![ 57 - opts.release_name, 58 - opts.release_mbid, 59 - track_name, 60 - opts.recording_mbid, 61 - opts.artist_name, 62 - ], 63 - |row| { 64 - let debug = MbzDebug { 65 - release: row.get(8)?, 66 - recording: row.get(9)?, 67 - artist: row.get(10)?, 68 - }; 69 - 70 - Ok(MbzQueryRes { 71 - track: row.get(0)?, 72 - track_gid: row.get(1)?, 73 - release_gid: row.get(2)?, 74 - release: row.get(3)?, 75 - recording_gid: row.get(4)?, 76 - release_group_gid: row.get(5)?, 77 - release_group: row.get(6)?, 78 - artists: row.get(7)?, 79 - debug: debug.to_string(), 80 - }) 81 - }, 82 - )? 83 - .collect::<Result<Vec<_>, _>>()?; 84 - 13 + let mut rows = query::find_mbz_data(conn, opts)?; 85 14 if !rows.is_empty() { 86 15 let row = rows.pop(); 87 16 return Ok(row); 88 17 } 89 18 90 - tracing::debug!("couldn't find '{track_name}' // {:?}", opts); 19 + tracing::debug!("couldn't find {:?}", opts); 91 20 92 21 Ok(None) 93 22 }
+145
src/mbz/query.rs
··· 1 + use duckdb::{Connection, params}; 2 + use std::time::Instant; 3 + 4 + #[derive(Debug, Default)] 5 + pub struct FindMbzData<'a> { 6 + pub track_name: &'a str, 7 + pub release_name: Option<&'a str>, 8 + pub release_mbid: Option<&'a str>, 9 + pub release_discrim: Option<&'a str>, 10 + pub recording_mbid: Option<&'a str>, 11 + pub isrc: Option<&'a str>, 12 + pub artist_name: Option<&'a str>, 13 + pub track_discrim: Option<&'a str>, 14 + } 15 + 16 + #[derive(Debug, Clone)] 17 + pub struct MbzQueryRes { 18 + pub track: String, 19 + pub track_gid: String, 20 + pub release_gid: Option<String>, 21 + pub release: Option<String>, 22 + pub recording_gid: Option<String>, 23 + pub release_group_gid: Option<String>, 24 + pub release_group: Option<String>, 25 + pub artists: Option<String>, 26 + pub debug: String, 27 + } 28 + 29 + impl MbzQueryRes { 30 + fn from_row(row: &duckdb::Row, start: Option<Instant>) -> duckdb::Result<Self> { 31 + let debug = MbzDebug::from_row(row, start)?; 32 + 33 + Ok(MbzQueryRes { 34 + track: row.get(0)?, 35 + track_gid: row.get(1)?, 36 + release_gid: row.get(2)?, 37 + release: row.get(3)?, 38 + recording_gid: row.get(4)?, 39 + release_group_gid: row.get(5)?, 40 + release_group: row.get(6)?, 41 + artists: row.get(7)?, 42 + debug: debug.to_string(), 43 + }) 44 + } 45 + } 46 + 47 + #[derive(Debug)] 48 + struct MbzDebug { 49 + release: f64, 50 + recording: f64, 51 + artist: f64, 52 + took: f32, 53 + } 54 + 55 + impl MbzDebug { 56 + pub fn from_row(row: &duckdb::Row, start: Option<Instant>) -> duckdb::Result<Self> { 57 + Ok(MbzDebug { 58 + release: row.get(8)?, 59 + recording: row.get(9)?, 60 + artist: row.get(10)?, 61 + took: start.map(|v| v.elapsed().as_secs_f32()).unwrap_or_default(), 62 + }) 63 + } 64 + } 65 + 66 + impl std::fmt::Display for MbzDebug { 67 + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 68 + write!( 69 + f, 70 + "release={:.3};recording={:.3};artist={:.3};time={:.3}", 71 + self.release, self.recording, self.artist, self.took, 72 + ) 73 + } 74 + } 75 + 76 + pub fn find_mbz_data(conn: &Connection, query: &FindMbzData) -> duckdb::Result<Vec<MbzQueryRes>> { 77 + let start = Instant::now(); 78 + 79 + if let Some(isrc) = query.isrc { 80 + find_by_isrc(conn, start, isrc, query.release_name, query.release_mbid) 81 + } else if let Some((rel, rec)) = query.release_mbid.zip(query.recording_mbid) { 82 + find_by_mbid_all(conn, start, rec, rel) 83 + } /*else if query.release_mbid.is_some() || query.recording_mbid.is_some() { 84 + find_by_mbid(conn, start) 85 + }*/ else { 86 + find_by_fts(conn, start, query) 87 + } 88 + } 89 + 90 + // when recording xor release mbids are set 91 + fn find_by_mbid(conn: &Connection, ts: Instant) -> duckdb::Result<Vec<MbzQueryRes>> { 92 + Ok(vec![]) 93 + } 94 + 95 + // when recording AND release mbids are set 96 + fn find_by_mbid_all( 97 + conn: &Connection, 98 + ts: Instant, 99 + recording: &str, 100 + release: &str, 101 + ) -> duckdb::Result<Vec<MbzQueryRes>> { 102 + let mut stmt = conn.prepare_cached(include_str!("find_by_mbid_all.sql"))?; 103 + 104 + stmt.query_map(params![recording, release], |row| { 105 + MbzQueryRes::from_row(row, Some(ts)) 106 + })? 107 + .collect() 108 + } 109 + 110 + // when isrc is set. release may or may not be set 111 + fn find_by_isrc( 112 + conn: &Connection, 113 + ts: Instant, 114 + isrc: &str, 115 + release_name: Option<&str>, 116 + release_mbid: Option<&str>, 117 + ) -> duckdb::Result<Vec<MbzQueryRes>> { 118 + let mut stmt = conn.prepare_cached(include_str!("find_by_isrc.sql"))?; 119 + 120 + stmt.query_map(params![release_name, release_mbid, isrc], |row| { 121 + MbzQueryRes::from_row(row, Some(ts)) 122 + })? 123 + .collect() 124 + } 125 + 126 + // when there are only names (this is absolutely worst case!) 127 + fn find_by_fts( 128 + conn: &Connection, 129 + ts: Instant, 130 + query: &FindMbzData, 131 + ) -> duckdb::Result<Vec<MbzQueryRes>> { 132 + let mut stmt = conn.prepare_cached(include_str!("find_by_fts.sql"))?; 133 + 134 + stmt.query_map( 135 + params![ 136 + query.release_name, 137 + query.release_mbid, 138 + query.track_name, 139 + query.recording_mbid, 140 + query.artist_name, 141 + ], 142 + |row| MbzQueryRes::from_row(row, Some(ts)), 143 + )? 144 + .collect() 145 + }
-23
src/mbz/query.sql
··· 1 - with releases as (select *, rel_score: mbz.fts_release($1, $2) 2 - from mbz.release 3 - where (rel_score is not null and rel_score > 5 and $2 IS NULL) 4 - or ($2 IS NOT NULL and release.gid = $2)), 5 - recordings as (select *, rec_score: mbz.fts_recording($3, $4) 6 - from mbz.recording 7 - where (rec_score is not null and rec_score > 5 and $4 IS NULL) 8 - or ($4 IS NOT NULL and recording.gid = $4)), 9 - artists as (select *, artist_score: flashback.fts_mbz_artist_credit.match_bm25(artist_credit.id, $5), 10 - from mbz.artist_credit 11 - where artist_score is not null 12 - and artist_score > 2.5) 13 - select track_name: track.name, track_gid: track.gid, release_gid: releases.gid, release_name: releases.name, recording_gid: recordings.gid, release_grp_gid: release_group.gid, release_grp_name: release_group.name, artists: artists.name, rel_score, 14 - rec_score, 15 - coalesce(artist_score, 0) 16 - from mbz.track 17 - inner join recordings on recordings.id = track.recording 18 - inner join mbz.medium on medium.id = track.medium 19 - inner join releases on releases.id = medium.release 20 - inner join mbz.release_group on release_group.id = releases.release_group 21 - left join artists on artists.id = recordings.artist_credit 22 - where is_data_track = false 23 - order by rel_score desc, rec_score desc, artist_score desc limit 10;