···90909191 // tool loop: keep calling the model until it produces a plain text response
9292 let mut tool_iterations = 0usize;
9393+ let mut accumulated_thinking = String::new();
9394 const MAX_TOOL_ITERATIONS: usize = 20;
9595+9696+ // emit Started once — all tokens/thinking for the whole turn go into one bubble
9797+ let _ = output.send(AgentEvent::Started);
94989599 loop {
96100 let (tok_tx, mut tok_rx) = mpsc::channel(256);
···100104 tokio::spawn(async move {
101105 let _ = llm2.stream(&msgs, &defs, tok_tx).await;
102106 });
103103- let _ = output.send(AgentEvent::Started);
104107105108 let mut response = String::new();
106109 let mut thinking = String::new();
···110113 match ev {
111114 LlmEvent::ThinkToken(tok) => {
112115 thinking.push_str(&tok);
116116+ accumulated_thinking.push_str(&tok);
113117 let _ = output.send(AgentEvent::ThinkToken(tok));
114118 }
115119 LlmEvent::Token(tok) => {
···127131128132 if !tool_calls.is_empty() && tool_iterations < MAX_TOOL_ITERATIONS {
129133 tool_iterations += 1;
130130- ctx.push_assistant_tool_calls(tool_calls.clone());
134134+ let text_content = if response.is_empty() {
135135+ None
136136+ } else {
137137+ Some(response.clone())
138138+ };
139139+ ctx.push_assistant_tool_calls(tool_calls.clone(), text_content);
131140132141 for call in &tool_calls {
133142 let name = call.function.name.clone();
···146155147156 ctx.push_tool_result(&call.id, &result);
148157 }
149149-150150- // loop back to let the model process tool results
151158 continue;
152159 }
153160154161 // plain text response (or tool limit hit) — wrap up the turn
155162 ctx.push_assistant(&response);
156156- let thinking_ref = (!thinking.is_empty()).then_some(thinking.as_str());
163163+ let thinking_ref =
164164+ (!accumulated_thinking.is_empty()).then_some(accumulated_thinking.as_str());
157165 let _ = memory.log_turn("assistant", &response, thinking_ref);
158166 turn_count += 1;
159167···190198 output: &broadcast::Sender<AgentEvent>,
191199) -> Result<()> {
192200 // run reflection before draining so the agent can curate memories
193193- let _ = output.send(AgentEvent::Status("reflecting...".into()));
194194- if let Err(e) = reflect(tool_ctx, ctx).await {
201201+ if let Err(e) = reflect(tool_ctx, ctx, output).await {
195202 tracing::warn!(err = %e, "reflection failed");
196203 }
197204···234241235242/// ephemeral reflection loop: let the agent review and curate its memories
236243/// without touching the main conversation context
237237-async fn reflect(tool_ctx: &ToolContext, ctx: &Context) -> Result<()> {
244244+async fn reflect(
245245+ tool_ctx: &ToolContext,
246246+ ctx: &Context,
247247+ output: &broadcast::Sender<AgentEvent>,
248248+) -> Result<()> {
238249 let pinned = tool_ctx.memory.pinned_memories().unwrap_or_default();
239250 let unpinned = tool_ctx.memory.recent_unpinned(20).unwrap_or_default();
240251···274285 .take(10)
275286 .filter_map(|m| {
276287 let content = m.content.as_deref()?;
277277- let snippet = if content.len() > 120 {
278278- format!("{}...", &content[..120])
279279- } else {
280280- content.to_string()
281281- };
282282- Some(format!("{}: {snippet}", m.role))
288288+ Some(format!("{}: {content}", m.role))
283289 })
284290 .collect::<Vec<_>>()
285291 .into_iter()
···301307 let reflect_registry = tools::memory_tools();
302308 let mut msgs = vec![Message::user(reflection_prompt)];
303309304304- // mini tool loop, max 6 iterations
305305- for _ in 0..6 {
306306- let (tok_tx, mut tok_rx) = mpsc::channel(128);
310310+ let _ = output.send(AgentEvent::ReflectStarted);
311311+312312+ for _ in 0..20 {
313313+ let (tok_tx, mut tok_rx) = mpsc::channel(512);
307314 let llm2 = tool_ctx.llm.clone();
308315 let msgs_snap = msgs.clone();
309316 let defs_snap = reflect_registry.definitions();
···312319 });
313320314321 let mut tool_calls = vec![];
315315- let mut response = String::new();
316322 while let Some(ev) = tok_rx.recv().await {
317323 match ev {
318324 LlmEvent::ToolCalls(calls) => tool_calls = calls,
319319- LlmEvent::Token(t) => response.push_str(&t),
325325+ LlmEvent::Token(t) => {
326326+ let _ = output.send(AgentEvent::Token(t));
327327+ }
328328+ LlmEvent::ThinkToken(t) => {
329329+ let _ = output.send(AgentEvent::ThinkToken(t));
330330+ }
320331 _ => {}
321332 }
322333 }
···325336 break;
326337 }
327338328328- msgs.push(Message::with_tool_calls(tool_calls.clone()));
339339+ msgs.push(Message::with_tool_calls(tool_calls.clone(), None));
329340 for call in &tool_calls {
341341+ let name = call.function.name.clone();
342342+ let args = call.function.arguments.clone();
343343+ let _ = output.send(AgentEvent::ToolCall {
344344+ name: name.clone(),
345345+ args: args.clone(),
346346+ });
330347 let result = reflect_registry.execute(call, tool_ctx).await;
348348+ let _ = output.send(AgentEvent::ToolResult {
349349+ name: name.clone(),
350350+ content: result.clone(),
351351+ });
331352 msgs.push(Message::tool_result(&call.id, &result));
332353 }
333354 }
334355356356+ let _ = output.send(AgentEvent::ReflectDone);
335357 Ok(())
336358}
+6-4
klbr-core/src/context.rs
···9090 self.turns.push(Message::assistant(content.to_string()));
9191 }
92929393- /// push an assistant message that contains tool calls (no text content)
9494- pub fn push_assistant_tool_calls(&mut self, calls: Vec<ToolCall>) {
9595- self.turns.push(Message::with_tool_calls(calls));
9393+ /// push an assistant message that contains tool calls, optionally with text
9494+ /// content the model emitted before deciding to call tools.
9595+ pub fn push_assistant_tool_calls(&mut self, calls: Vec<ToolCall>, content: Option<String>) {
9696+ self.turns.push(Message::with_tool_calls(calls, content));
9697 }
97989898- /// push a tool result back into the context
9999+ /// push a tool result back into the context as a separate role:"tool" message.
100100+ /// LM Studio converts these to Gemma 4's bundled tool_responses format internally.
99101 pub fn push_tool_result(&mut self, tool_call_id: &str, content: &str) {
100102 self.turns.push(Message::tool_result(tool_call_id, content));
101103 }
···290290 "SELECT m.id, m.content, m.tags, v.distance
291291 FROM vec_memories v
292292 JOIN memories m ON m.id = v.rowid
293293- WHERE v.embedding MATCH ?1
294294- ORDER BY v.distance
295295- LIMIT ?2",
293293+ WHERE v.embedding MATCH ?1 AND k = ?2
294294+ ORDER BY v.distance",
296295 )?;
297296 let rows = stmt.query_map(params![blob, k as i64], |row| {
298297 Ok((
+6-3
klbr-core/src/tools/recall.rs
···1414fn definition() -> ToolDef {
1515 ToolDef::function(
1616 "recall",
1717- "semantic search over long-term memory. \
1717+ "semantic search over long-term memory (facts stored across sessions). \
1818+ does NOT search the current conversation — use this only for things you \
1919+ might have stored previously with remember(). \
2020+ use a specific, noun-heavy query like \"user's preferred editor\" not \
2121+ \"what we just discussed\". \
1822 with no tags: searches all memories by meaning. \
1923 with tags: searches only within memories that match those tags, \
2020- ranked by semantic similarity (never misses a tag-matched memory due to global ranking). \
2121- provide at least a query.",
2424+ ranked by semantic similarity.",
2225 json!({
2326 "type": "object",
2427 "properties": {
+12-2
klbr-daemon/src/daemon.rs
···138138 }
139139 }
140140 }
141141- ev = rx.recv() => {
142142- let ev = ev?;
141141+ res = rx.recv() => {
142142+ let ev = match res {
143143+ Ok(ev) => ev,
144144+ Err(broadcast::error::RecvError::Lagged(n)) => {
145145+ tracing::warn!(n, "client lagged, events dropped");
146146+ continue;
147147+ }
148148+ Err(broadcast::error::RecvError::Closed) => break,
149149+ };
150150+143151 let msg = match ev {
144152 AgentEvent::Started => ServerMsg::Started,
145153 AgentEvent::Token(content) => ServerMsg::Token { content },
146154 AgentEvent::ThinkToken(content) => ServerMsg::ThinkToken { content },
147155 AgentEvent::Done => ServerMsg::Done,
156156+ AgentEvent::ReflectStarted => ServerMsg::ReflectStarted,
157157+ AgentEvent::ReflectDone => ServerMsg::ReflectDone,
148158 AgentEvent::Status(content) => ServerMsg::Status { content },
149159 AgentEvent::Metrics(m) => ServerMsg::Metrics {
150160 turn_count: m.turn_count,
+1-1
klbr-daemon/src/main.rs
···1515 let snapshot = Arc::new(RwLock::new(None)) as MetricsSnapshot;
16161717 let (interrupt_tx, interrupt_rx) = mpsc::channel(32);
1818- let (output_tx, _) = broadcast::channel::<AgentEvent>(256);
1818+ let (output_tx, _) = broadcast::channel::<AgentEvent>(4096);
19192020 let history_window = config.compaction_keep;
2121 let agent = tokio::spawn(agent::run(