···254254 if hasattr(self.tokenizer, "name_or_path"):
255255 name_or_path = str(getattr(self.tokenizer, "name_or_path", "")).lower()
256256 model_type = ReasoningExtractor.detect_model_type(name_or_path)
257257-258257 if model_type:
259258 # This is a reasoning model
260259 self._is_reasoning_model = True
+33-13
tiles/src/runtime/mlx.rs
···391391 loop {
392392 if remaining_count > 0 {
393393 let chat_start = remaining_count == run_args.relay_count;
394394- if let Ok(response) =
395395- chat(&input, modelname, chat_start, &python_code, &g_reply).await
394394+ if let Ok(response) = chat(
395395+ &input,
396396+ modelname,
397397+ chat_start,
398398+ &python_code,
399399+ &g_reply,
400400+ run_args,
401401+ )
402402+ .await
396403 {
397404 if response.reply.is_empty() {
398405 if !response.code.is_empty() {
···401408 remaining_count -= 1;
402409 } else {
403410 g_reply = response.reply.clone();
404404- println!("\n{}", response.reply.trim());
411411+ if run_args.memory {
412412+ println!("\n{}", response.reply.trim());
413413+ } else {
414414+ println!("\n");
415415+ }
405416 break;
406417 }
407418 } else {
···473484 chat_start: bool,
474485 python_code: &str,
475486 g_reply: &str,
487487+ run_args: &RunArgs,
476488) -> Result<ChatResponse, String> {
477489 let client = Client::new();
478490···493505 let mut stream = res.bytes_stream();
494506 let mut accumulated = String::new();
495507 println!();
508508+ let mut is_answer_start = false;
496509 while let Some(chunk) = stream.next().await {
497510 let chunk = chunk.unwrap();
498511 let s = String::from_utf8_lossy(&chunk);
···504517 let data = line.trim_start_matches("data: ");
505518506519 if data == "[DONE]" {
507507- return Ok(convert_to_chat_response(&accumulated));
520520+ return Ok(convert_to_chat_response(&accumulated, run_args.memory));
508521 }
509522 // Parse JSON
510523 let v: Value = serde_json::from_str(data).unwrap();
511524 if let Some(delta) = v["choices"][0]["delta"]["content"].as_str() {
512525 accumulated.push_str(delta);
513513- print!("{}", delta.dimmed());
526526+ if !run_args.memory && delta.contains("**[Answer]**") {
527527+ is_answer_start = true;
528528+ }
529529+ if !is_answer_start {
530530+ print!("{}", delta.dimmed());
531531+ } else {
532532+ print!("{}", delta);
533533+ }
514534 use std::io::Write;
515535 std::io::stdout().flush().ok();
516536 }
···519539 Err(String::from("request failed"))
520540}
521541522522-fn convert_to_chat_response(content: &str) -> ChatResponse {
542542+fn convert_to_chat_response(content: &str, memory_mode: bool) -> ChatResponse {
523543 ChatResponse {
524524- reply: extract_reply(content),
544544+ reply: extract_reply(content, memory_mode),
525545 code: extract_python(content),
526546 }
527547}
528548529529-fn extract_reply(content: &str) -> String {
530530- if content.contains("<reply>") && content.contains("</reply>") {
549549+fn extract_reply(content: &str, memory_mode: bool) -> String {
550550+ if !memory_mode && content.contains("**[Answer]**") {
551551+ let list_a = content.split("**[Answer]**").collect::<Vec<&str>>();
552552+ list_a[1].to_owned()
553553+ } else if content.contains("<reply>") && content.contains("</reply>") {
531554 let list_a = content.split("<reply>").collect::<Vec<&str>>();
532555 let list_b = list_a[1].split("</reply>").collect::<Vec<&str>>();
533556 list_b[0].to_owned()
···561584}
562585563586fn get_default_modelfile(memory_mode: bool) -> Result<PathBuf> {
564564- // get default by the args -m
565565- // let path =
566587 if memory_mode {
567588 let path = get_lib_dir()?.join("modelfiles/mem-agent");
568589 Ok(path)
569590 } else {
570570- // let path = get_lib_dir()?.join("modelfiles/gpt-oss");
571571- let path = get_lib_dir()?.join("modelfiles/mem-agent");
591591+ let path = get_lib_dir()?.join("modelfiles/gpt-oss");
572592 Ok(path)
573593 }
574594}