A local-first private AI assistant for everyday use. Runs on-device models with encrypted P2P sync, and supports sharing chats publicly on ATProto.
10
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix: Fixed multi-turn issue with harmonic models

- It was due to improper handling of assistant msg

madclaws 6153dea9 cd225dff

+34 -19
-2
server/api.py
··· 106 106 except Exception as e: 107 107 print(e) 108 108 109 - print(f"REQUEST => {request}") 110 - 111 109 if request.stream: 112 110 return StreamingResponse( 113 111 runtime.backend.generate_response_chat_stream(request),
+9 -2
server/backend/mlx.py
··· 723 723 ) 724 724 ] 725 725 for item in convos: 726 - print(f"ITEM {item}") 726 + # print(f"ITEM {item}") 727 727 match item: 728 728 case CUserMessageItemParam(): 729 729 content = "" ··· 744 744 ) 745 745 ) 746 746 case CAssistantMessageItemParam(): 747 + content = "" 748 + if isinstance(item.content, list): 749 + content = item.content[0].text 750 + else: 751 + content = item.content.root 752 + 747 753 convo_list.append( 748 754 Message.from_role_and_content( 749 - Role.ASSISTANT, item.content.root 755 + Role.ASSISTANT, content 750 756 ) # pyright: ignore 751 757 ) 752 758 case CSystemMessageItemParam(): ··· 761 767 762 768 763 769 def is_harmony_family(model_name: str): 770 + 764 771 return ReasoningExtractor.detect_model_type(model_name) == "gpt-oss"
+25 -15
tiles/src/runtime/mlx.rs
··· 13 13 use anyhow::{Context, Result, anyhow}; 14 14 use atrium_api::types::string::Datetime; 15 15 use log::info; 16 + use owo_colors::OwoColorize; 16 17 use reqwest::{Client, StatusCode}; 17 18 use rustyline::completion::Completer; 18 19 use rustyline::highlight::Highlighter; ··· 383 384 Ok(()) 384 385 } 385 386 387 + #[allow(unused_assignments)] 386 388 async fn start_repl( 387 389 mlx_runtime: &MLXRuntime, 388 390 modelfile: &Modelfile, ··· 430 432 // called `Result::unwrap()` on an `Err` value: Os { code: 32, kind: BrokenPipe, message: "Broken pipe" } 431 433 // 432 434 // User pressed Ctrl+C or Ctrl+D 433 - let end_payload = json!({ 434 - "type": "abort", 435 - }); 436 - let payload_str = format!("{}\n", serde_json::to_string(&end_payload)?); 437 - pi_stdin.write_all(payload_str.as_bytes())?; 438 - pi_stdin.flush()?; 435 + // let end_payload = json!({ 436 + // "type": "abort", 437 + // }); 438 + // let payload_str = format!("{}\n", serde_json::to_string(&end_payload)?); 439 + // pi_stdin.write_all(payload_str.as_bytes())?; 440 + // pi_stdin.flush()?; 439 441 println!("Exiting interactive mode"); 440 442 if !cfg!(debug_assertions) { 441 443 let _res = mlx_runtime.stop_server_daemon().await; ··· 450 452 match handle_input(&input) { 451 453 InputType::Skip => continue, 452 454 InputType::Exit => { 453 - let end_payload = json!({ 454 - "type": "abort", 455 - }); 456 - let payload_str = format!("{}\n", serde_json::to_string(&end_payload)?); 457 - pi_stdin.write_all(payload_str.as_bytes())?; 458 - pi_stdin.flush()?; 455 + // let end_payload = json!({ 456 + // "type": "abort", 457 + // }); 458 + // let payload_str = format!("{}\n", serde_json::to_string(&end_payload)?); 459 + // pi_stdin.write_all(payload_str.as_bytes())?; 460 + // pi_stdin.flush()?; 459 461 println!("Exiting interactive mode"); 460 462 if !cfg!(debug_assertions) { 461 463 let _res = mlx_runtime.stop_server_daemon().await; ··· 515 517 516 518 let reader = BufReader::new(&mut stdout); 517 519 let mut last_chat_id: String = "".to_owned(); 520 + let mut has_answer_start = false; 518 521 for line in reader.lines() { 519 522 //TODO: handle the unwrap 520 523 let line = line?; ··· 526 529 if msg_update.assistant_message_event.r#type == "text_delta" 527 530 && msg_update.assistant_message_event.delta.is_some() 528 531 { 529 - // TODO: Can we remove the unwrap 530 - print!("{}", msg_update.assistant_message_event.delta.unwrap()); 531 - // TODO: maybe can optimize check print! doc 532 + let delta = msg_update.assistant_message_event.delta.unwrap(); 533 + if delta.contains("**[Answer]**") { 534 + has_answer_start = true; 535 + } 536 + if has_answer_start { 537 + print!("{}", delta); 538 + } else { 539 + print!("{}", delta.dimmed()); 540 + } 532 541 use std::io::Write; 533 542 std::io::stdout().flush().ok(); 534 543 } 535 544 } 536 545 PiResponse::AgentEnd => { 546 + has_answer_start = false; 537 547 break; 538 548 } 539 549 PiResponse::TurnEnd(turn_event) => {