Merge pull request #79 from tilesprivacy/harmony-support

+33 -14

2 changed files

expand all

server

backend

mlx_runner.py

tiles

src

runtime

mlx.rs

-1

server/backend/mlx_runner.py

··· 254 254 if hasattr(self.tokenizer, "name_or_path"): 255 255 name_or_path = str(getattr(self.tokenizer, "name_or_path", "")).lower() 256 256 model_type = ReasoningExtractor.detect_model_type(name_or_path) 257 - 258 257 if model_type: 259 258 # This is a reasoning model 260 259 self._is_reasoning_model = True

+33 -13

tiles/src/runtime/mlx.rs

··· 391 391 loop { 392 392 if remaining_count > 0 { 393 393 let chat_start = remaining_count == run_args.relay_count; 394 - if let Ok(response) = 395 - chat(&input, modelname, chat_start, &python_code, &g_reply).await 394 + if let Ok(response) = chat( 395 + &input, 396 + modelname, 397 + chat_start, 398 + &python_code, 399 + &g_reply, 400 + run_args, 401 + ) 402 + .await 396 403 { 397 404 if response.reply.is_empty() { 398 405 if !response.code.is_empty() { ··· 401 408 remaining_count -= 1; 402 409 } else { 403 410 g_reply = response.reply.clone(); 404 - println!("\n{}", response.reply.trim()); 411 + if run_args.memory { 412 + println!("\n{}", response.reply.trim()); 413 + } else { 414 + println!("\n"); 415 + } 405 416 break; 406 417 } 407 418 } else { ··· 473 484 chat_start: bool, 474 485 python_code: &str, 475 486 g_reply: &str, 487 + run_args: &RunArgs, 476 488 ) -> Result<ChatResponse, String> { 477 489 let client = Client::new(); 478 490 ··· 493 505 let mut stream = res.bytes_stream(); 494 506 let mut accumulated = String::new(); 495 507 println!(); 508 + let mut is_answer_start = false; 496 509 while let Some(chunk) = stream.next().await { 497 510 let chunk = chunk.unwrap(); 498 511 let s = String::from_utf8_lossy(&chunk); ··· 504 517 let data = line.trim_start_matches("data: "); 505 518 506 519 if data == "[DONE]" { 507 - return Ok(convert_to_chat_response(&accumulated)); 520 + return Ok(convert_to_chat_response(&accumulated, run_args.memory)); 508 521 } 509 522 // Parse JSON 510 523 let v: Value = serde_json::from_str(data).unwrap(); 511 524 if let Some(delta) = v["choices"][0]["delta"]["content"].as_str() { 512 525 accumulated.push_str(delta); 513 - print!("{}", delta.dimmed()); 526 + if !run_args.memory && delta.contains("**[Answer]**") { 527 + is_answer_start = true; 528 + } 529 + if !is_answer_start { 530 + print!("{}", delta.dimmed()); 531 + } else { 532 + print!("{}", delta); 533 + } 514 534 use std::io::Write; 515 535 std::io::stdout().flush().ok(); 516 536 } ··· 519 539 Err(String::from("request failed")) 520 540 } 521 541 522 - fn convert_to_chat_response(content: &str) -> ChatResponse { 542 + fn convert_to_chat_response(content: &str, memory_mode: bool) -> ChatResponse { 523 543 ChatResponse { 524 - reply: extract_reply(content), 544 + reply: extract_reply(content, memory_mode), 525 545 code: extract_python(content), 526 546 } 527 547 } 528 548 529 - fn extract_reply(content: &str) -> String { 530 - if content.contains("<reply>") && content.contains("</reply>") { 549 + fn extract_reply(content: &str, memory_mode: bool) -> String { 550 + if !memory_mode && content.contains("**[Answer]**") { 551 + let list_a = content.split("**[Answer]**").collect::<Vec<&str>>(); 552 + list_a[1].to_owned() 553 + } else if content.contains("<reply>") && content.contains("</reply>") { 531 554 let list_a = content.split("<reply>").collect::<Vec<&str>>(); 532 555 let list_b = list_a[1].split("</reply>").collect::<Vec<&str>>(); 533 556 list_b[0].to_owned() ··· 561 584 } 562 585 563 586 fn get_default_modelfile(memory_mode: bool) -> Result<PathBuf> { 564 - // get default by the args -m 565 - // let path = 566 587 if memory_mode { 567 588 let path = get_lib_dir()?.join("modelfiles/mem-agent"); 568 589 Ok(path) 569 590 } else { 570 - // let path = get_lib_dir()?.join("modelfiles/gpt-oss"); 571 - let path = get_lib_dir()?.join("modelfiles/mem-agent"); 591 + let path = get_lib_dir()?.join("modelfiles/gpt-oss"); 572 592 Ok(path) 573 593 } 574 594 }

Configure Feed

Configure Feed