WIP push-to-talk Letta chat frontend
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

add proof-of-concept STT

+610 -23
+336 -6
src-tauri/Cargo.lock
··· 545 545 546 546 [[package]] 547 547 name = "core-foundation" 548 + version = "0.9.4" 549 + source = "registry+https://github.com/rust-lang/crates.io-index" 550 + checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" 551 + dependencies = [ 552 + "core-foundation-sys", 553 + "libc", 554 + ] 555 + 556 + [[package]] 557 + name = "core-foundation" 548 558 version = "0.10.1" 549 559 source = "registry+https://github.com/rust-lang/crates.io-index" 550 560 checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" ··· 566 576 checksum = "fa95a34622365fa5bbf40b20b75dba8dfa8c94c734aea8ac9a5ca38af14316f1" 567 577 dependencies = [ 568 578 "bitflags 2.9.3", 569 - "core-foundation", 579 + "core-foundation 0.10.1", 570 580 "core-graphics-types", 571 - "foreign-types", 581 + "foreign-types 0.5.0", 572 582 "libc", 573 583 ] 574 584 ··· 579 589 checksum = "3d44a101f213f6c4cdc1853d4b78aef6db6bdfa3468798cc1d9912f4735013eb" 580 590 dependencies = [ 581 591 "bitflags 2.9.3", 582 - "core-foundation", 592 + "core-foundation 0.10.1", 583 593 "libc", 584 594 ] 585 595 ··· 739 749 ] 740 750 741 751 [[package]] 752 + name = "dasp" 753 + version = "0.11.0" 754 + source = "registry+https://github.com/rust-lang/crates.io-index" 755 + checksum = "7381b67da416b639690ac77c73b86a7b5e64a29e31d1f75fb3b1102301ef355a" 756 + dependencies = [ 757 + "dasp_envelope", 758 + "dasp_frame", 759 + "dasp_interpolate", 760 + "dasp_peak", 761 + "dasp_ring_buffer", 762 + "dasp_rms", 763 + "dasp_sample", 764 + "dasp_signal", 765 + "dasp_slice", 766 + "dasp_window", 767 + ] 768 + 769 + [[package]] 770 + name = "dasp_envelope" 771 + version = "0.11.0" 772 + source = "registry+https://github.com/rust-lang/crates.io-index" 773 + checksum = "8ec617ce7016f101a87fe85ed44180839744265fae73bb4aa43e7ece1b7668b6" 774 + dependencies = [ 775 + "dasp_frame", 776 + "dasp_peak", 777 + "dasp_ring_buffer", 778 + "dasp_rms", 779 + "dasp_sample", 780 + ] 781 + 782 + [[package]] 783 + name = "dasp_frame" 784 + version = "0.11.0" 785 + source = "registry+https://github.com/rust-lang/crates.io-index" 786 + checksum = "b2a3937f5fe2135702897535c8d4a5553f8b116f76c1529088797f2eee7c5cd6" 787 + dependencies = [ 788 + "dasp_sample", 789 + ] 790 + 791 + [[package]] 792 + name = "dasp_interpolate" 793 + version = "0.11.0" 794 + source = "registry+https://github.com/rust-lang/crates.io-index" 795 + checksum = "7fc975a6563bb7ca7ec0a6c784ead49983a21c24835b0bc96eea11ee407c7486" 796 + dependencies = [ 797 + "dasp_frame", 798 + "dasp_ring_buffer", 799 + "dasp_sample", 800 + ] 801 + 802 + [[package]] 803 + name = "dasp_peak" 804 + version = "0.11.0" 805 + source = "registry+https://github.com/rust-lang/crates.io-index" 806 + checksum = "5cf88559d79c21f3d8523d91250c397f9a15b5fc72fbb3f87fdb0a37b79915bf" 807 + dependencies = [ 808 + "dasp_frame", 809 + "dasp_sample", 810 + ] 811 + 812 + [[package]] 813 + name = "dasp_ring_buffer" 814 + version = "0.11.0" 815 + source = "registry+https://github.com/rust-lang/crates.io-index" 816 + checksum = "07d79e19b89618a543c4adec9c5a347fe378a19041699b3278e616e387511ea1" 817 + 818 + [[package]] 819 + name = "dasp_rms" 820 + version = "0.11.0" 821 + source = "registry+https://github.com/rust-lang/crates.io-index" 822 + checksum = "a6c5dcb30b7e5014486e2822537ea2beae50b19722ffe2ed7549ab03774575aa" 823 + dependencies = [ 824 + "dasp_frame", 825 + "dasp_ring_buffer", 826 + "dasp_sample", 827 + ] 828 + 829 + [[package]] 742 830 name = "dasp_sample" 743 831 version = "0.11.0" 744 832 source = "registry+https://github.com/rust-lang/crates.io-index" 745 833 checksum = "0c87e182de0887fd5361989c677c4e8f5000cd9491d6d563161a8f3a5519fc7f" 834 + 835 + [[package]] 836 + name = "dasp_signal" 837 + version = "0.11.0" 838 + source = "registry+https://github.com/rust-lang/crates.io-index" 839 + checksum = "aa1ab7d01689c6ed4eae3d38fe1cea08cba761573fbd2d592528d55b421077e7" 840 + dependencies = [ 841 + "dasp_envelope", 842 + "dasp_frame", 843 + "dasp_interpolate", 844 + "dasp_peak", 845 + "dasp_ring_buffer", 846 + "dasp_rms", 847 + "dasp_sample", 848 + "dasp_window", 849 + ] 850 + 851 + [[package]] 852 + name = "dasp_slice" 853 + version = "0.11.0" 854 + source = "registry+https://github.com/rust-lang/crates.io-index" 855 + checksum = "4e1c7335d58e7baedafa516cb361360ff38d6f4d3f9d9d5ee2a2fc8e27178fa1" 856 + dependencies = [ 857 + "dasp_frame", 858 + "dasp_sample", 859 + ] 860 + 861 + [[package]] 862 + name = "dasp_window" 863 + version = "0.11.1" 864 + source = "registry+https://github.com/rust-lang/crates.io-index" 865 + checksum = "99ded7b88821d2ce4e8b842c9f1c86ac911891ab89443cc1de750cae764c5076" 866 + dependencies = [ 867 + "dasp_sample", 868 + ] 869 + 870 + [[package]] 871 + name = "data-encoding" 872 + version = "2.9.0" 873 + source = "registry+https://github.com/rust-lang/crates.io-index" 874 + checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" 746 875 747 876 [[package]] 748 877 name = "deranged" ··· 1021 1150 1022 1151 [[package]] 1023 1152 name = "foreign-types" 1153 + version = "0.3.2" 1154 + source = "registry+https://github.com/rust-lang/crates.io-index" 1155 + checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" 1156 + dependencies = [ 1157 + "foreign-types-shared 0.1.1", 1158 + ] 1159 + 1160 + [[package]] 1161 + name = "foreign-types" 1024 1162 version = "0.5.0" 1025 1163 source = "registry+https://github.com/rust-lang/crates.io-index" 1026 1164 checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" 1027 1165 dependencies = [ 1028 1166 "foreign-types-macros", 1029 - "foreign-types-shared", 1167 + "foreign-types-shared 0.3.1", 1030 1168 ] 1031 1169 1032 1170 [[package]] ··· 1039 1177 "quote", 1040 1178 "syn 2.0.106", 1041 1179 ] 1180 + 1181 + [[package]] 1182 + name = "foreign-types-shared" 1183 + version = "0.1.1" 1184 + source = "registry+https://github.com/rust-lang/crates.io-index" 1185 + checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" 1042 1186 1043 1187 [[package]] 1044 1188 name = "foreign-types-shared" ··· 2096 2240 version = "0.1.0" 2097 2241 dependencies = [ 2098 2242 "cpal", 2243 + "dasp", 2244 + "futures-util", 2099 2245 "serde", 2100 2246 "serde_json", 2101 2247 "tauri", ··· 2104 2250 "tauri-plugin-positioner", 2105 2251 "tauri-plugin-single-instance", 2106 2252 "tauri-plugin-window-state", 2253 + "tokio", 2254 + "tokio-tungstenite", 2255 + "tungstenite", 2107 2256 ] 2108 2257 2109 2258 [[package]] ··· 2128 2277 ] 2129 2278 2130 2279 [[package]] 2280 + name = "native-tls" 2281 + version = "0.2.14" 2282 + source = "registry+https://github.com/rust-lang/crates.io-index" 2283 + checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" 2284 + dependencies = [ 2285 + "libc", 2286 + "log", 2287 + "openssl", 2288 + "openssl-probe", 2289 + "openssl-sys", 2290 + "schannel", 2291 + "security-framework", 2292 + "security-framework-sys", 2293 + "tempfile", 2294 + ] 2295 + 2296 + [[package]] 2131 2297 name = "ndk" 2132 2298 version = "0.9.0" 2133 2299 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2532 2698 ] 2533 2699 2534 2700 [[package]] 2701 + name = "openssl" 2702 + version = "0.10.73" 2703 + source = "registry+https://github.com/rust-lang/crates.io-index" 2704 + checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8" 2705 + dependencies = [ 2706 + "bitflags 2.9.3", 2707 + "cfg-if", 2708 + "foreign-types 0.3.2", 2709 + "libc", 2710 + "once_cell", 2711 + "openssl-macros", 2712 + "openssl-sys", 2713 + ] 2714 + 2715 + [[package]] 2716 + name = "openssl-macros" 2717 + version = "0.1.1" 2718 + source = "registry+https://github.com/rust-lang/crates.io-index" 2719 + checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" 2720 + dependencies = [ 2721 + "proc-macro2", 2722 + "quote", 2723 + "syn 2.0.106", 2724 + ] 2725 + 2726 + [[package]] 2727 + name = "openssl-probe" 2728 + version = "0.1.6" 2729 + source = "registry+https://github.com/rust-lang/crates.io-index" 2730 + checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" 2731 + 2732 + [[package]] 2733 + name = "openssl-sys" 2734 + version = "0.9.109" 2735 + source = "registry+https://github.com/rust-lang/crates.io-index" 2736 + checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571" 2737 + dependencies = [ 2738 + "cc", 2739 + "libc", 2740 + "pkg-config", 2741 + "vcpkg", 2742 + ] 2743 + 2744 + [[package]] 2535 2745 name = "option-ext" 2536 2746 version = "0.2.0" 2537 2747 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2963 3173 ] 2964 3174 2965 3175 [[package]] 3176 + name = "rand" 3177 + version = "0.9.2" 3178 + source = "registry+https://github.com/rust-lang/crates.io-index" 3179 + checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" 3180 + dependencies = [ 3181 + "rand_chacha 0.9.0", 3182 + "rand_core 0.9.3", 3183 + ] 3184 + 3185 + [[package]] 2966 3186 name = "rand_chacha" 2967 3187 version = "0.2.2" 2968 3188 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2983 3203 ] 2984 3204 2985 3205 [[package]] 3206 + name = "rand_chacha" 3207 + version = "0.9.0" 3208 + source = "registry+https://github.com/rust-lang/crates.io-index" 3209 + checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" 3210 + dependencies = [ 3211 + "ppv-lite86", 3212 + "rand_core 0.9.3", 3213 + ] 3214 + 3215 + [[package]] 2986 3216 name = "rand_core" 2987 3217 version = "0.5.1" 2988 3218 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2998 3228 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 2999 3229 dependencies = [ 3000 3230 "getrandom 0.2.16", 3231 + ] 3232 + 3233 + [[package]] 3234 + name = "rand_core" 3235 + version = "0.9.3" 3236 + source = "registry+https://github.com/rust-lang/crates.io-index" 3237 + checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" 3238 + dependencies = [ 3239 + "getrandom 0.3.3", 3001 3240 ] 3002 3241 3003 3242 [[package]] ··· 3178 3417 ] 3179 3418 3180 3419 [[package]] 3420 + name = "schannel" 3421 + version = "0.1.27" 3422 + source = "registry+https://github.com/rust-lang/crates.io-index" 3423 + checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" 3424 + dependencies = [ 3425 + "windows-sys 0.59.0", 3426 + ] 3427 + 3428 + [[package]] 3181 3429 name = "schemars" 3182 3430 version = "0.8.22" 3183 3431 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3235 3483 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" 3236 3484 3237 3485 [[package]] 3486 + name = "security-framework" 3487 + version = "2.11.1" 3488 + source = "registry+https://github.com/rust-lang/crates.io-index" 3489 + checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" 3490 + dependencies = [ 3491 + "bitflags 2.9.3", 3492 + "core-foundation 0.9.4", 3493 + "core-foundation-sys", 3494 + "libc", 3495 + "security-framework-sys", 3496 + ] 3497 + 3498 + [[package]] 3499 + name = "security-framework-sys" 3500 + version = "2.14.0" 3501 + source = "registry+https://github.com/rust-lang/crates.io-index" 3502 + checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32" 3503 + dependencies = [ 3504 + "core-foundation-sys", 3505 + "libc", 3506 + ] 3507 + 3508 + [[package]] 3238 3509 name = "selectors" 3239 3510 version = "0.24.0" 3240 3511 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3421 3692 ] 3422 3693 3423 3694 [[package]] 3695 + name = "sha1" 3696 + version = "0.10.6" 3697 + source = "registry+https://github.com/rust-lang/crates.io-index" 3698 + checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" 3699 + dependencies = [ 3700 + "cfg-if", 3701 + "cpufeatures", 3702 + "digest", 3703 + ] 3704 + 3705 + [[package]] 3424 3706 name = "sha2" 3425 3707 version = "0.10.9" 3426 3708 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3495 3777 "bytemuck", 3496 3778 "cfg_aliases", 3497 3779 "core-graphics", 3498 - "foreign-types", 3780 + "foreign-types 0.5.0", 3499 3781 "js-sys", 3500 3782 "log", 3501 3783 "objc2 0.5.2", ··· 3651 3933 dependencies = [ 3652 3934 "bitflags 2.9.3", 3653 3935 "block2 0.6.1", 3654 - "core-foundation", 3936 + "core-foundation 0.10.1", 3655 3937 "core-graphics", 3656 3938 "crossbeam-channel", 3657 3939 "dispatch", ··· 4122 4404 ] 4123 4405 4124 4406 [[package]] 4407 + name = "tokio-native-tls" 4408 + version = "0.3.1" 4409 + source = "registry+https://github.com/rust-lang/crates.io-index" 4410 + checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" 4411 + dependencies = [ 4412 + "native-tls", 4413 + "tokio", 4414 + ] 4415 + 4416 + [[package]] 4417 + name = "tokio-tungstenite" 4418 + version = "0.27.0" 4419 + source = "registry+https://github.com/rust-lang/crates.io-index" 4420 + checksum = "489a59b6730eda1b0171fcfda8b121f4bee2b35cba8645ca35c5f7ba3eb736c1" 4421 + dependencies = [ 4422 + "futures-util", 4423 + "log", 4424 + "native-tls", 4425 + "tokio", 4426 + "tokio-native-tls", 4427 + "tungstenite", 4428 + ] 4429 + 4430 + [[package]] 4125 4431 name = "tokio-util" 4126 4432 version = "0.7.16" 4127 4433 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4334 4640 checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" 4335 4641 4336 4642 [[package]] 4643 + name = "tungstenite" 4644 + version = "0.27.0" 4645 + source = "registry+https://github.com/rust-lang/crates.io-index" 4646 + checksum = "eadc29d668c91fcc564941132e17b28a7ceb2f3ebf0b9dae3e03fd7a6748eb0d" 4647 + dependencies = [ 4648 + "bytes", 4649 + "data-encoding", 4650 + "http", 4651 + "httparse", 4652 + "log", 4653 + "native-tls", 4654 + "rand 0.9.2", 4655 + "sha1", 4656 + "thiserror 2.0.16", 4657 + "utf-8", 4658 + ] 4659 + 4660 + [[package]] 4337 4661 name = "typeid" 4338 4662 version = "1.0.3" 4339 4663 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4456 4780 "serde", 4457 4781 "wasm-bindgen", 4458 4782 ] 4783 + 4784 + [[package]] 4785 + name = "vcpkg" 4786 + version = "0.2.15" 4787 + source = "registry+https://github.com/rust-lang/crates.io-index" 4788 + checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" 4459 4789 4460 4790 [[package]] 4461 4791 name = "version-compare"
+5
src-tauri/Cargo.toml
··· 23 23 serde = { version = "1", features = ["derive"] } 24 24 serde_json = "1" 25 25 cpal = "0.16.0" 26 + tokio-tungstenite = { version = "0.27.0", features = ["native-tls"] } 27 + tungstenite = "0.27.0" 28 + futures-util = "0.3.31" 29 + tokio = "1.47.1" 30 + dasp = { version = "0.11.0", features = ["all"] } 26 31 27 32 [target.'cfg(not(any(target_os = "android", target_os = "ios")))'.dependencies] 28 33 tauri-plugin-positioner = "2"
+46
src-tauri/src/cartesia/client.rs
··· 1 + use std::sync::Arc; 2 + use tauri::{async_runtime::RwLock, Url}; 3 + use tokio::net::TcpStream; 4 + use tokio_tungstenite::{ 5 + connect_async, tungstenite::client::IntoClientRequest, MaybeTlsStream, WebSocketStream, 6 + }; 7 + 8 + #[derive(Clone)] 9 + pub struct CartesiaClient { 10 + api_key: Arc<RwLock<String>>, 11 + } 12 + 13 + impl CartesiaClient { 14 + pub fn new(api_key: String) -> Self { 15 + Self { 16 + api_key: Arc::new(RwLock::new(api_key)), 17 + } 18 + } 19 + 20 + pub async fn open_stt_connection(&self) -> WebSocketStream<MaybeTlsStream<TcpStream>> { 21 + let api_key = self.api_key.read().await.clone(); 22 + let mut request = Url::parse_with_params( 23 + "wss://api.cartesia.ai/stt/websocket", 24 + &[ 25 + ("model", "ink-whisper"), 26 + ("encoding", "pcm_s16le"), 27 + ("sample_rate", "16000"), 28 + ], 29 + ) 30 + .expect("failed to parse STT connection URL") 31 + .as_str() 32 + .into_client_request() 33 + .expect("failed to instantiate STT WebSocket request"); 34 + 35 + let headers = request.headers_mut(); 36 + 37 + headers.insert("X-API-Key", api_key.parse().unwrap()); 38 + headers.insert("Cartesia-Version", "2025-04-16".parse().unwrap()); 39 + 40 + let (stream, _) = connect_async(request) 41 + .await 42 + .expect("failed to open STT websocket connection"); 43 + 44 + stream 45 + } 46 + }
+13
src-tauri/src/cartesia/commands.rs
··· 1 + use crate::state::AppState; 2 + 3 + #[tauri::command] 4 + pub async fn start_stt(state: tauri::State<'_, AppState>) -> Result<(), ()> { 5 + state.stt_manager.transcribe().await; 6 + Ok(()) 7 + } 8 + 9 + #[tauri::command] 10 + pub async fn stop_stt(state: tauri::State<'_, AppState>) -> Result<(), ()> { 11 + state.stt_manager.stop_transcription().await; 12 + Ok(()) 13 + }
+3
src-tauri/src/cartesia/mod.rs
··· 1 + pub mod client; 2 + pub mod commands; 3 + pub mod stt;
+107
src-tauri/src/cartesia/stt.rs
··· 1 + use super::client::CartesiaClient; 2 + use crate::devices::input::InputDeviceManager; 3 + use dasp::Signal; 4 + use futures_util::{ 5 + future::join, 6 + stream::{SplitSink, SplitStream}, 7 + SinkExt, StreamExt, 8 + }; 9 + use std::sync::Arc; 10 + use tauri::async_runtime::Mutex; 11 + use tokio::net::TcpStream; 12 + use tokio_tungstenite::{MaybeTlsStream, WebSocketStream}; 13 + use tungstenite::{Bytes, Message}; 14 + 15 + pub struct SttManager { 16 + client: Arc<CartesiaClient>, 17 + input: Arc<InputDeviceManager>, 18 + is_transcribing: Arc<Mutex<bool>>, 19 + } 20 + 21 + impl SttManager { 22 + pub fn new(client: Arc<CartesiaClient>, input: Arc<InputDeviceManager>) -> Self { 23 + Self { 24 + client, 25 + input, 26 + is_transcribing: Arc::new(Mutex::new(false)), 27 + } 28 + } 29 + 30 + async fn handle_messages( 31 + &self, 32 + mut reader: SplitStream<WebSocketStream<MaybeTlsStream<TcpStream>>>, 33 + ) { 34 + println!("handling messages"); 35 + while let Some(message) = reader.next().await { 36 + let is_transcribing = self.is_transcribing.lock().await; 37 + 38 + if *is_transcribing { 39 + println!("message from STT: {:?}", message) 40 + } else { 41 + println!("finished transcribing; breaking message handling"); 42 + break; 43 + } 44 + } 45 + } 46 + 47 + async fn send_frames( 48 + &self, 49 + signal: impl Signal<Frame = i16>, 50 + mut writer: SplitSink<WebSocketStream<MaybeTlsStream<TcpStream>>, Message>, 51 + ) { 52 + let mut buffer = Vec::with_capacity(3200); 53 + println!("sending frames"); 54 + for frame in signal.until_exhausted() { 55 + let is_transcribing = self.is_transcribing.lock().await; 56 + 57 + if *is_transcribing { 58 + buffer.push(frame.to_le_bytes()); 59 + 60 + if buffer.len() == buffer.capacity() { 61 + println!("sending {} frames", buffer.len()); 62 + writer 63 + .send(tungstenite::Message::Binary(Bytes::from_iter( 64 + buffer.iter().flat_map(|f| *f), 65 + ))) 66 + .await 67 + .expect("failed to send binary frame message to STT"); 68 + 69 + buffer.clear(); 70 + } 71 + } else { 72 + println!("stopping frame send"); 73 + break; 74 + } 75 + } 76 + } 77 + 78 + /// Begins transcribing text via Cartesia. Blocks until `stop_transcription` is called 79 + pub async fn transcribe(&self) { 80 + let stream = self.client.open_stt_connection().await; 81 + let input = self.input.start_listening().await; 82 + let (writer, reader) = stream.split(); 83 + 84 + { 85 + let mut is_transcribing = self.is_transcribing.lock().await; 86 + *is_transcribing = true; 87 + } 88 + 89 + // Handle incoming messages 90 + let read = self.handle_messages(reader); 91 + let write = self.send_frames(input, writer); 92 + 93 + join(read, write).await; 94 + 95 + stream.close(); 96 + } 97 + 98 + /// Terminates the microphone signal and halts the transcription processes 99 + pub async fn stop_transcription(&self) { 100 + { 101 + let mut is_transcribing = self.is_transcribing.lock().await; 102 + *is_transcribing = false; 103 + } 104 + 105 + self.input.stop_listening().await; 106 + } 107 + }
+66 -6
src-tauri/src/devices/input.rs
··· 1 1 use crate::devices::types::AudioDeviceError; 2 - use cpal::traits::HostTrait; 3 - use cpal::{default_host, Device, Host}; 2 + use cpal::traits::{DeviceTrait, HostTrait, StreamTrait}; 3 + use cpal::{default_host, Device, SampleFormat, Stream}; 4 + use dasp::interpolate::sinc::Sinc; 5 + use dasp::ring_buffer; 6 + use dasp::signal::{self, Signal}; 7 + use dasp::Frame; 8 + use std::sync::{mpsc::channel, Arc}; 9 + use tauri::async_runtime::Mutex; 4 10 5 11 pub struct InputDeviceManager { 6 - host: Host, 7 12 device: Device, 13 + stream: Arc<Mutex<Option<Stream>>>, 14 + } 15 + 16 + fn resample( 17 + signal: impl Signal<Frame = i16>, 18 + source_hz: f64, 19 + target_hz: f64, 20 + ) -> impl Signal<Frame = i16> { 21 + let ring_buffer = ring_buffer::Fixed::from([i16::EQUILIBRIUM; 10]); 22 + let sinc = Sinc::new(ring_buffer); 23 + signal.from_hz_to_hz(sinc, source_hz, target_hz) 8 24 } 9 25 10 26 impl InputDeviceManager { ··· 13 29 let device = host 14 30 .default_input_device() 15 31 .ok_or(AudioDeviceError::NoDevicesFound)?; 16 - let manager = InputDeviceManager { host, device }; 32 + let manager = InputDeviceManager { 33 + device, 34 + stream: Arc::new(Mutex::new(None)), 35 + }; 17 36 18 37 Ok(manager) 19 38 } 20 39 21 - pub fn get_device(&self) -> &Device { 22 - &self.device 40 + pub async fn start_listening(&self) -> impl Signal<Frame = i16> { 41 + let config = self 42 + .device 43 + .supported_input_configs() 44 + .expect("failed to get supported input configs") 45 + .find(|c| c.sample_format() == SampleFormat::I16) 46 + .expect("failed to find PCM s16 config") 47 + .with_max_sample_rate(); 48 + let channels = config.channels() as usize; 49 + let (tx, rx) = channel::<Vec<i16>>(); 50 + let stream = self 51 + .device 52 + .build_input_stream( 53 + &config.config(), 54 + move |data: &[i16], _: &cpal::InputCallbackInfo| { 55 + let mono = data 56 + .chunks(channels) 57 + .map(|frame| (frame.iter().sum::<i16>() / channels as i16).to_le()); 58 + 59 + let _ = tx 60 + .send(mono.collect()) 61 + .inspect(|_| println!("successfully sent frame")) 62 + .inspect_err(|e| eprintln!("failed to send frame: {e}")); 63 + }, 64 + |err| eprintln!("encountered error streaming input: {}", err), 65 + None, 66 + ) 67 + .expect("failed to create input stream"); 68 + stream.play().expect("failed to start input stream"); 69 + 70 + let samples = rx.into_iter().flat_map(|x| x.into_iter()); 71 + let signal = signal::from_iter(samples); 72 + let signal = resample(signal, config.sample_rate().0 as f64, 16000.0); 73 + 74 + let mut s = self.stream.lock().await; 75 + *s = Some(stream); 76 + 77 + signal 78 + } 79 + 80 + pub async fn stop_listening(&self) { 81 + let mut s = self.stream.lock().await; 82 + *s = None; 23 83 } 24 84 }
+6 -1
src-tauri/src/lib.rs
··· 1 1 use crate::state::AppState; 2 + use tauri::async_runtime::Mutex; 2 3 use tauri::{Manager, WebviewUrl, WebviewWindowBuilder}; 3 4 use tauri_plugin_positioner::{Position, WindowExt}; 4 5 use tauri_plugin_window_state::{StateFlags, WindowExt as StateWindowExt}; 5 6 7 + mod cartesia; 6 8 mod devices; 7 9 mod state; 8 10 ··· 62 64 63 65 Ok(()) 64 66 }) 65 - .invoke_handler(tauri::generate_handler![greet]) 67 + .invoke_handler(tauri::generate_handler![ 68 + cartesia::commands::start_stt, 69 + cartesia::commands::stop_stt, 70 + ]) 66 71 .run(tauri::generate_context!()) 67 72 .expect("error while running tauri application"); 68 73 }
+20 -9
src-tauri/src/state.rs
··· 1 - use std::sync::{Arc, RwLock}; 1 + use std::sync::Arc; 2 2 3 - use crate::devices::{ 4 - input::InputDeviceManager, output::OutputDeviceManager, types::AudioDeviceError, 3 + use crate::{ 4 + cartesia::{client::CartesiaClient, stt::SttManager}, 5 + devices::{input::InputDeviceManager, output::OutputDeviceManager, types::AudioDeviceError}, 5 6 }; 6 7 7 8 pub struct AppState { 8 - pub input_device_manager: Arc<RwLock<InputDeviceManager>>, 9 - pub output_device_manager: Arc<RwLock<OutputDeviceManager>>, 9 + pub cartesia_client: Arc<CartesiaClient>, 10 + pub stt_manager: Arc<SttManager>, 11 + pub input_device_manager: Arc<InputDeviceManager>, 12 + pub output_device_manager: Arc<OutputDeviceManager>, 10 13 } 11 14 12 15 impl AppState { 13 16 pub fn new() -> Result<Self, AudioDeviceError> { 14 - let input_device_manager = InputDeviceManager::new()?; 15 - let output_device_manager = OutputDeviceManager::new()?; 17 + let input_device_manager = Arc::new(InputDeviceManager::new()?); 18 + let output_device_manager = Arc::new(OutputDeviceManager::new()?); 19 + 20 + let cartesia_client = Arc::new(CartesiaClient::new("TODO".into())); 21 + let stt_manager = Arc::new(SttManager::new( 22 + cartesia_client.clone(), 23 + input_device_manager.clone(), 24 + )); 16 25 17 26 Ok(AppState { 18 - input_device_manager: Arc::new(RwLock::new(input_device_manager)), 19 - output_device_manager: Arc::new(RwLock::new(output_device_manager)), 27 + input_device_manager, 28 + output_device_manager, 29 + cartesia_client, 30 + stt_manager, 20 31 }) 21 32 } 22 33 }
+8 -1
src/routes/+page.svelte
··· 1 - <main>miwiwi</main> 1 + <script lang="ts"> 2 + import { invoke } from "@tauri-apps/api/core" 3 + </script> 4 + 5 + <main>miwiwi</main> 6 + <button rel="button" on:click={() => invoke("start_stt")}>Start STT</button> 7 + <button rel="button" on:click={() => invoke("stop_stt")}>Stop STT</button> 8 +