A local-first private AI assistant for everyday use. Runs on-device models with encrypted P2P sync, and supports sharing chats publicly on ATProto.
10
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge pull request #92 from tilesprivacy/feat/harmony-renderer

Added harmony renderer suppport + responses api refactor

authored by

Anandu Pavanan and committed by
GitHub
49b74d6e 41511f24

+814 -293
+64
CHANGELOG.md
··· 1 + # Changelog 2 + 3 + All notable changes to this project are documented in this file. 4 + The format is based on https://keepachangelog.com/en/1.1.0/ 5 + 6 + ## [Unreleased] 7 + 8 + ## [0.4.2] - 2026-03-01 9 + ### Added 10 + - Added FTUE changes for account setup in https://github.com/tilesprivacy/tiles/pull/88 11 + - Added OTA updater in https://github.com/tilesprivacy/tiles/pull/89 12 + - Supports auto update checking and installing 13 + - Use `tiles update` for updating Tiles CLI manually 14 + 15 + ### Changed 16 + - Integrated Harmony renderer for gpt-oss model in https://github.com/tilesprivacy/tiles/pull/92 17 + 18 + ### Fixed 19 + - fix: Added path unavailability warning during installation in https://github.com/tilesprivacy/tiles/pull/90 20 + - coverage patch-1 in @https://github.com/tilesprivacy/tiles/pull/91 21 + 22 + ## [0.4.1] - 2026-02-22 23 + ### Added 24 + - Identity system for Tiles: 25 + - `tiles account` to show account details 26 + - `tiles account create <nickname>` to create root identity and optional nickname 27 + - `tiles account set-nickname` to set a nickname for root identity 28 + - Updated CLI to include default `tiles` command 29 + 30 + ## [0.4.0] - 2026-02-04 31 + ### Added 32 + - Portable Python runtime in the installer (no system Python required) 33 + - Bundled default Modelfiles and direct reading of system prompt from Modelfile 34 + - Support for `gpt-oss-20b` in interactive chat 35 + - Basic support for the Open Responses API (`/v1/responses`) and REST endpoints 36 + - Token metrics for model responses in the REPL 37 + - `-m` flag for `tiles run` to execute Tiles in memory mode (experimental) 38 + - Tilekit 0.2.0: `optimize` subcommand for automatic system-prompt optimization via DSRs 39 + 40 + ## [0.3.1] - 2026-01-09 41 + ### Added 42 + - `--relay-count` / `-r` option for `tiles run` (helps if model gets stuck) 43 + - CLI shows progress status while downloading models 44 + - Slash commands and placeholder hint in the REPL 45 + - Ability to set custom memory location via `tiles memory set-path <PATH>` 46 + 47 + ### Changed 48 + - Minor internal refactoring 49 + 50 + ## [0.3.0] - 2026-01-06 51 + ### Fixed 52 + - Tiles binary startup issue when run from outside a project directory 53 + - Model not unloading after exiting the REPL 54 + - Updated Python version to 3.13 for development 55 + - Enabled basic Linux compatibility 56 + 57 + ### Changed 58 + - Basic refactoring to support multiple inference runtimes 59 + 60 + ## [0.2.0] - 2025-12-20 61 + ### Added 62 + - Server commands 63 + - Streaming support with “thinking tokens” in the CLI 64 + - Auto-downloading of model specified in Modelfile
+1 -1
Cargo.lock
··· 4168 4168 4169 4169 [[package]] 4170 4170 name = "tiles" 4171 - version = "0.4.1" 4171 + version = "0.4.2" 4172 4172 dependencies = [ 4173 4173 "anyhow", 4174 4174 "async-std",
-39
modelfiles/gpt-oss
··· 1 1 FROM mlx-community/gpt-oss-20b-MXFP4-Q4 2 - SYSTEM """ 3 - You are Tiles, a helpful AI assistant. You have access to a secure Python sandbox for running code and managing your memory. 4 - 5 - ## CRITICAL: Output Format 6 - Your output must be structured into three distinct channels using these exact markers: 7 - 8 - 1. **Analysis Channel**: Thinking and planning. 9 - - Start: `<|channel|>analysis<|message|>` 10 - - End: `<|end|>` 11 - 12 - 2. **Code Channel**: Python code to execute. 13 - - Start: `<|channel|>code<|message|>` 14 - - End: `<|end|>` 15 - 16 - 3. **Final Response Channel**: Your final answer to the user. 17 - - Start: `<|channel|>final<|message|>` 18 - - End: `<|end|>` 19 - 20 - **Rules**: 21 - - ALWAYS start with the Analysis channel. 22 - - If you need to run code, use the Code channel. 23 - - If no code is needed, use the Final Response channel after Analysis. 24 - - **CRITICAL: ALWAYS assign function results and calculations to variables.** 25 - ```python 26 - # CORRECT 27 - result = math.sqrt(12345) 28 - # WRONG - The result will be LOST 29 - math.sqrt(12345) 30 - ``` 31 - - NEVER mention "ChatGPT" or "OpenAI". You are Tiles. 32 - - NEVER use legacy tags like `<think>`, `<python>`, or `<reply>`. Use ONLY the channel markers above. 33 - 34 - ### Handling Results 35 - When you receive a `<result>` block, it indicates the outcome of your code. 36 - - Analyze the result in the **Analysis** channel. 37 - - If the calculation is complete, provide the final answer in the **Final Response** channel immediately. 38 - - **DO NOT** repeat the code once you have the results unless you need to fix a specific error. 39 - - **DO NOT** ask the user if you should run code; just run it if needed using the Code channel. 40 - """
+1 -1
scripts/bundler.sh
··· 16 16 17 17 cargo build -p tiles --${TARGET} 18 18 19 - rm -rf "${DIST_DIR}" 19 + # rm -rf "${DIST_DIR}" 20 20 21 21 mkdir -p "${DIST_DIR}/tmp" 22 22 cp "target/${TARGET}/${BINARY_NAME}" "${DIST_DIR}/tmp/"
+1 -1
scripts/install.sh
··· 4 4 ENV="prod" # prod is another env, try taking it from github env 5 5 REPO="tilesprivacy/tiles" 6 6 # VERSION="${TILES_VERSION:-latest}" 7 - VERSION="0.4.1" 7 + VERSION="0.4.2" 8 8 INSTALL_DIR="$HOME/.local/bin" # CLI install location 9 9 SERVER_DIR="$HOME/.local/lib/tiles/server" # Python server folder 10 10 MODELFILE_DIR="$HOME/.local/lib/tiles/modelfiles" # Python server folder
+10 -16
server/api.py
··· 1 - from fastapi import FastAPI, HTTPException 2 - 3 - from .schemas import ( 4 - ChatMessage, 5 - ChatCompletionRequest, 6 - StartRequest, 7 - downloadRequest, 8 - ResponsesRequest, 9 - ) 10 1 import logging 11 2 import sys 12 3 from typing import Optional 13 4 5 + from fastapi import FastAPI, HTTPException 14 6 from fastapi.responses import StreamingResponse 15 7 from pydantic import BaseModel, Field 16 8 9 + from . import runtime 17 10 from .hf_downloader import pull_model 18 - 11 + from .mem_agent.engine import execute_sandboxed_code 19 12 from .mem_agent.utils import ( 20 13 create_memory_if_not_exists, 21 14 format_results, 22 15 ) 23 - from .mem_agent.engine import execute_sandboxed_code 24 - 25 - from . import runtime 16 + from .schemas import ( 17 + ChatCompletionRequest, 18 + ChatMessage, 19 + ResponsesRequest, 20 + StartRequest, 21 + downloadRequest, 22 + ) 26 23 27 24 logger = logging.getLogger("app") 28 25 _current_model_path: Optional[str] = None ··· 92 89 Create a response with openResponse format 93 90 """ 94 91 95 - global _messages 96 - 97 92 if request.stream: 98 - # Streaming response 99 93 return StreamingResponse( 100 94 runtime.backend.generate_response_chat_stream(request), 101 95 media_type="text/plain",
+196 -70
server/backend/mlx.py
··· 1 - from .mlx_runner import MLXRunner 2 - from ..cache_utils import get_model_path 1 + import json 2 + import logging 3 + import time 4 + import uuid 5 + from collections.abc import AsyncGenerator 6 + 3 7 from fastapi import HTTPException 8 + from openai_harmony import ( 9 + Conversation, 10 + DeveloperContent, 11 + Message, 12 + ReasoningEffort, 13 + Role, 14 + SystemContent, 15 + ) 16 + from openresponses_types import ReasoningEffortEnum 17 + from openresponses_types.types import ( 18 + DeveloperMessageItemParam, 19 + Error, 20 + IncompleteDetails, 21 + UserMessageItemParam, 22 + ) 23 + 24 + from ..cache_utils import get_model_path 25 + from ..hf_downloader import pull_model 4 26 from ..schemas import ( 27 + ChatCompletionRequest, 5 28 ChatMessage, 6 - ChatCompletionRequest, 29 + GenerationMetrics, 30 + ResponsesRequest, 7 31 ResponsesResponse, 8 32 downloadRequest, 9 - GenerationMetrics, 10 - ResponsesRequest, 11 33 ) 12 - from ..hf_downloader import pull_model 13 - 14 - import logging 15 - import json 16 - import time 17 - import uuid 18 - from collections.abc import AsyncGenerator 34 + from .mlx_runner import MLXRunner 19 35 20 36 logger = logging.getLogger("app") 21 37 ··· 36 52 raise HTTPException(status_code=400, detail="Downloading model failed") 37 53 38 54 39 - def get_or_load_model(model_spec: str, verbose: bool = False) -> MLXRunner: 55 + def get_or_load_model(model_spec: str, verbose: bool = True) -> MLXRunner: 40 56 """Get model from cache or load it if not cached.""" 41 57 global _model_cache, _current_model_path 42 58 ··· 204 220 205 221 def _prepend_previous_response(user_input: str, prev_id: Optional[str]) -> str: 206 222 """If prev_id points to a stored response, prepend its output text as context.""" 223 + 207 224 if not prev_id: 208 225 return user_input 209 - prev = _responses.get(prev_id) 226 + 227 + prev_id = json.loads(prev_id) 228 + 229 + prev = _responses.get(prev_id) # pyright: ignore 230 + 210 231 if not prev or not getattr(prev, "output", None): 211 232 return user_input 212 233 prev_text_parts: List[str] = [] ··· 219 240 return user_input 220 241 221 242 222 - def _calc_usage(runner: MLXRunner, input_text: str, generated_text: str) -> Dict[str, int]: 243 + def _calc_usage( 244 + runner: MLXRunner, input_text: str, generated_text: str 245 + ) -> Dict[str, int]: 223 246 """Calculate token usage using the runner tokenizer; fall back to zeros on error.""" 224 247 try: 225 248 input_tokens = len(runner.tokenizer.encode(input_text)) ··· 237 260 status: str, 238 261 output: List[Dict[str, Any]], 239 262 usage: Dict[str, int], 263 + error: Error | None = None, 264 + incomplete_details: IncompleteDetails | None = None, 240 265 metrics: Optional[Dict[str, Any]] = None, 241 - error: Optional[Dict[str, Any]] = None, 242 266 ) -> ResponsesResponse: 243 267 """Create a ResponsesResponse, attach metrics to metadata and store it in `_responses`.""" 244 268 resp = ResponsesResponse( ··· 251 275 error=error, 252 276 output=output, 253 277 usage=usage, 278 + incomplete_details=incomplete_details, 254 279 ) 255 280 if metrics: 256 281 try: ··· 269 294 return int(len(text.split()) * 1.3) # Approximation, convert to int 270 295 271 296 297 + def handle_response_input(request: ResponsesRequest): 298 + dev_msg_item = None 299 + user_msg_item = None 300 + user_input_content = "" 301 + if isinstance(request.input, str): 302 + user_input_content = request.input 303 + else: 304 + for item in request.input: 305 + match item: 306 + case UserMessageItemParam(): 307 + user_msg_item = item 308 + user_input_content = item.content.root # pyright: ignore 309 + case DeveloperMessageItemParam(): 310 + dev_msg_item = item 311 + case _: 312 + raise TypeError("unknown type") 313 + return [user_input_content, user_msg_item, dev_msg_item] 314 + 315 + 272 316 async def generate_response_chat_stream( 273 - request: ResponsesRequest 317 + request: ResponsesRequest, 274 318 ) -> AsyncGenerator[str, None]: 275 319 """Generate streaming chat responses for Responses API.""" 276 - 277 - model = request.model or "mlx-community/gpt-oss-20b-MXFP4-Q4" 278 - user_input = request.input or "" 279 - response_id = f"resp-{uuid.uuid4()}" 280 - msg_id = f"msg_{uuid.uuid4()}" 320 + model = request.model 281 321 created = int(time.time()) 282 322 runner = get_or_load_model(model) 283 323 metrics = None 284 - # If a previous_response_id is provided, prepend its text to the prompt 285 - prev_id = getattr(request, "previous_response_id", None) 286 - user_input = _prepend_previous_response(user_input, prev_id) 287 324 288 - # Calculate input tokens once 289 - input_tokens = len(runner.tokenizer.encode(user_input)) 325 + user_input_content = "" 326 + 327 + dev_msg_item = None 328 + user_msg_item = None 329 + [user_input_content, user_msg_item, dev_msg_item] = handle_response_input(request) 330 + user_input_content = _prepend_previous_response( 331 + user_input_content, request.previous_response_id 332 + ) 333 + 334 + reasoning_effort = get_reasoning_effort(request.reasoning.effort) 335 + 336 + convo = build_harmony_conversation( 337 + reasoning_effort, dev_msg_item, user_input_content 338 + ) 339 + 340 + input_tokens = len(runner.tokenizer.encode(user_input_content)) # pyright: ignore 290 341 291 342 # Initial chunk 292 343 initial_chunk = { 293 - "id": response_id, 344 + "id": f"resp_{uuid.uuid4()}", 294 345 "object": "response.chunk", 295 346 "created_at": created, 296 347 "model": model, ··· 298 349 "output": [ 299 350 { 300 351 "type": "message", 301 - "id": msg_id, 352 + "id": f"msg_{uuid.uuid4()}", 302 353 "status": "in_progress", 303 354 "role": "assistant", 304 355 "content": [], ··· 307 358 "usage": {"input_tokens": input_tokens, "output_tokens": 0}, 308 359 } 309 360 yield f"data: {json.dumps(initial_chunk)}\n\n" 310 - 311 - # Stream tokens 361 + 312 362 accumulated_text = "" 363 + answer_text = "" 313 364 output_tokens = 0 365 + error = None 366 + incomplete_details = None 367 + has_answer_started: bool = False 368 + # TODO: we need to inject the context prepending, else model is losing it. 314 369 try: 315 - for token in runner.generate_streaming( 316 - prompt=user_input, 370 + for token in runner.generate_streaming_gpt( 371 + conversation=convo, 317 372 max_tokens=runner.get_effective_max_tokens(request.max_output_tokens), 318 373 temperature=request.temperature or 1, 319 374 top_p=request.top_p or 1, 320 - use_chat_template=True, 321 375 ): 322 376 if isinstance(token, GenerationMetrics): 323 377 metrics = token 324 378 continue 325 - 379 + 380 + if not isinstance(token, str): 381 + continue 382 + 383 + if "**[Answer]**" in token or has_answer_started: 384 + has_answer_started = True 385 + answer_text += token 386 + 326 387 accumulated_text += token 327 388 output_tokens += 1 # Each yield is one token 328 - 389 + 329 390 chunk = { 330 - "id": response_id, 391 + "id": f"resp_{uuid.uuid4()}", 331 392 "object": "response.chunk", 332 393 "created_at": created, 333 394 "model": model, ··· 335 396 "output": [ 336 397 { 337 398 "type": "message", 338 - "id": msg_id, 399 + "id": f"msg_{uuid.uuid4()}", 339 400 "status": "in_progress", 340 401 "role": "assistant", 341 402 "content": [ ··· 350 411 "usage": {"input_tokens": input_tokens, "output_tokens": output_tokens}, 351 412 } 352 413 yield f"data: {json.dumps(chunk)}\n\n" 353 - 414 + 354 415 except Exception as e: 416 + error = {"message": str(e), "code": "500"} 417 + incomplete_details = {"reason": "internal server error"} 418 + 355 419 error_chunk = { 356 - "id": response_id, 420 + "id": f"resp_{uuid.uuid4()}", 357 421 "object": "response.chunk", 358 422 "created_at": created, 359 423 "model": model, 360 424 "status": "failed", 361 - "error": {"message": str(e), "type": "internal_error"}, 425 + "error": error, 426 + "incomplete_details": incomplete_details, 362 427 "output": [], 363 428 "usage": {"input_tokens": input_tokens, "output_tokens": output_tokens}, 364 429 } 365 430 yield f"data: {json.dumps(error_chunk)}\n\n" 366 431 return 367 - 432 + 368 433 # Final chunk 369 434 completed_at = int(time.time()) 370 435 # Build final chunk with accumulated text and store response for follow-ups 436 + 371 437 final_chunk = { 372 - "id": response_id, 438 + "id": f"resp_{uuid.uuid4()}", 373 439 "object": "response.chunk", 374 440 "created_at": created, 375 441 "completed_at": completed_at, ··· 378 444 "output": [ 379 445 { 380 446 "type": "message", 381 - "id": msg_id, 447 + "id": f"msg_{uuid.uuid4()}", 382 448 "status": "completed", 383 449 "role": "assistant", 384 450 "content": [ 385 451 { 386 452 "type": "output_text", 387 - "text": "", 453 + "text": answer_text, 388 454 "annotations": [], 389 455 } 390 456 ], ··· 392 458 ], 393 459 "usage": {"input_tokens": input_tokens, "output_tokens": output_tokens}, 394 460 } 461 + 395 462 # Store and return a typed ResponsesResponse for follow-ups 396 463 metrics_obj = None 397 464 if metrics: ··· 404 471 final_chunk["metrics"] = metrics_obj 405 472 406 473 _store_response( 407 - response_id=response_id, 474 + response_id=final_chunk["id"], 408 475 created=created, 409 476 completed_at=completed_at, 410 477 model=model, ··· 418 485 419 486 420 487 async def generate_response_chat(request: ResponsesRequest): 421 - """Generate chat responses""" 488 + """Generate chat responses for Responses API""" 422 489 423 - model = request.model or "mlx-community/gpt-oss-20b-MXFP4-Q4" 424 - user_input = request.input or "" 490 + model = request.model 425 491 response_id = f"resp-{uuid.uuid4()}" 426 492 msg_id = f"msg_{uuid.uuid4()}" 427 493 created = int(time.time()) 428 494 runner = get_or_load_model(model) 429 495 430 - # If a previous_response_id is provided, prepend its text to the prompt 431 - prev_id = getattr(request, "previous_response_id", None) 432 - user_input = _prepend_previous_response(user_input, prev_id) 496 + user_input_content = "" 497 + 498 + dev_msg_item = None 499 + user_msg_item = None 500 + [user_input_content, user_msg_item, dev_msg_item] = handle_response_input(request) 501 + user_input_content = _prepend_previous_response( 502 + user_input_content, request.previous_response_id 503 + ) 504 + 505 + reasoning_effort = get_reasoning_effort(request.reasoning.effort) 506 + 507 + convo = build_harmony_conversation( 508 + reasoning_effort, dev_msg_item, user_input_content 509 + ) 433 510 434 511 metrics_obj = None 512 + error = None 513 + incomplete_details = None 514 + 435 515 try: 436 516 start_time = time.time() 437 - generated_text = runner.generate_batch( 438 - prompt=user_input, 517 + generated_text = runner.generate_batch_gpt( 518 + conversation=convo, 439 519 max_tokens=runner.get_effective_max_tokens(request.max_output_tokens), 440 520 temperature=request.temperature or 1, 441 521 top_p=request.top_p or 1, ··· 448 528 completed_at = int(time.time()) 449 529 status = "completed" 450 530 error = None 451 - 531 + incomplete_details = None 452 532 # Calculate token usage 453 - usage = _calc_usage(runner, user_input, generated_text) 533 + usage = _calc_usage(runner, user_input_content, generated_text) 454 534 output_tokens = usage.get("output_tokens", 0) 455 535 metrics_obj = { 456 536 "ttft_ms": generation_time * 1000.0, 457 537 "total_tokens": output_tokens, 458 - "tokens_per_second": (output_tokens / generation_time) if generation_time > 0 else 0.0, 538 + "tokens_per_second": (output_tokens / generation_time) 539 + if generation_time > 0 540 + else 0.0, 459 541 "total_latency_s": generation_time, 460 542 } 461 543 462 544 except Exception as e: 463 545 completed_at = None 464 546 status = "failed" 465 - error = {"message": str(e), "type": "internal_error"} 547 + error = {"message": str(e), "code": "500"} 548 + incomplete_details = {"reason": "internal server error"} 466 549 generated_text = "" 467 550 usage = {"input_tokens": 0, "output_tokens": 0} 468 551 469 - output_block = [ 470 - { 471 - "type": "message", 472 - "id": msg_id, 473 - "status": "completed" if status == "completed" else "failed", 474 - "role": "assistant", 475 - "content": [ 476 - {"type": "output_text", "text": generated_text, "annotations": []} 477 - ], 478 - } 479 - ] if status == "completed" else [] 552 + output_block = ( 553 + [ 554 + { 555 + "type": "message", 556 + "id": msg_id, 557 + "status": "completed" if status == "completed" else "failed", 558 + "role": "assistant", 559 + "content": [ 560 + {"type": "output_text", "text": generated_text, "annotations": []} 561 + ], 562 + } 563 + ] 564 + if status == "completed" 565 + else [] 566 + ) 480 567 481 568 resp = _store_response( 482 569 response_id=response_id, ··· 486 573 status=status, 487 574 output=output_block, 488 575 usage=usage, 489 - metrics=(metrics_obj if status == "completed" else None), 490 576 error=error, 577 + incomplete_details=incomplete_details, 578 + metrics=(metrics_obj if status == "completed" else None), 491 579 ) 492 580 493 581 return resp 582 + 583 + 584 + def get_reasoning_effort(reasoning_effort_enum: ReasoningEffortEnum | None): 585 + reasoning_effort: ReasoningEffort 586 + match reasoning_effort_enum: 587 + case ReasoningEffortEnum.high: 588 + reasoning_effort = ReasoningEffort.HIGH 589 + case ReasoningEffortEnum.medium: 590 + reasoning_effort = ReasoningEffort.MEDIUM 591 + case ReasoningEffortEnum.low: 592 + reasoning_effort = ReasoningEffort.LOW 593 + case ReasoningEffortEnum.xhigh: 594 + reasoning_effort = ReasoningEffort.HIGH 595 + case _: 596 + raise TypeError("unknow reasoing effort") 597 + return reasoning_effort 598 + 599 + 600 + def build_harmony_conversation( 601 + reasoning_effort: ReasoningEffort, 602 + dev_msg_item: DeveloperMessageItemParam | None, 603 + user_input: str, 604 + ): 605 + system_message = SystemContent.new().with_reasoning_effort(reasoning_effort) 606 + dev_message: DeveloperContent = DeveloperContent.new() 607 + if isinstance(dev_msg_item, DeveloperMessageItemParam): 608 + dev_message = DeveloperContent.new().with_instructions( 609 + dev_msg_item.content.root 610 + ) # pyright: ignore 611 + 612 + convo = Conversation.from_messages( 613 + [ 614 + Message.from_role_and_content(Role.SYSTEM, system_message), 615 + Message.from_role_and_content(Role.DEVELOPER, dev_message), 616 + Message.from_role_and_content(Role.USER, user_input), 617 + ] 618 + ) 619 + return convo
+205 -3
server/backend/mlx_runner.py
··· 7 7 import os 8 8 import sys 9 9 import time 10 + from ast import Yield 10 11 from collections.abc import Iterator 11 12 from pathlib import Path 12 13 from typing import Dict, Optional 14 + 15 + from mlx_lm.tokenizer_utils import TokenizerWrapper 13 16 14 17 if sys.platform == "darwin": 15 18 import mlx.core as mx ··· 18 21 from mlx_lm import load 19 22 from mlx_lm.generate import generate_step 20 23 from mlx_lm.sample_utils import make_repetition_penalty, make_sampler 24 + from openai_harmony import ( 25 + Conversation, 26 + HarmonyEncodingName, 27 + Message, 28 + Role, 29 + StreamableParser, 30 + SystemContent, 31 + load_harmony_encoding, 32 + ) 21 33 22 34 from ..reasoning_utils import ReasoningExtractor, StreamingReasoningParser 23 35 from ..schemas import GenerationMetrics ··· 62 74 class MLXRunner: 63 75 """Direct MLX model runner with streaming and interactive capabilities.""" 64 76 77 + model_path: Path 78 + adapter_path: str | None 79 + model: object | None 80 + tokenizer: TokenizerWrapper | None 81 + _memory_baseline: float | None 82 + _stop_tokens: list[str] | None 83 + _message_end_tokens: list[str] | None 84 + _chat_stop_tokens: list[str] | None 85 + _context_length: int | None 86 + _is_reasoning_model: bool 87 + _reasoning_start: str | None 88 + _reasoning_end: str | None 89 + _final_start: str | None 90 + verbose: bool 91 + _model_loaded: bool 92 + _context_entered: bool 93 + 65 94 def __init__( 66 - self, model_path: str, adapter_path: Optional[str] = None, verbose: bool = False 95 + self, model_path: str, adapter_path: str | None = None, verbose: bool = False 67 96 ): 68 97 """Initialize the runner with a model. 69 98 ··· 113 142 return False # Don't suppress exceptions 114 143 115 144 def load_model(self): 145 + if mx is None: 146 + raise RuntimeError("MLX runtime not available in current runtime") 116 147 """Load the MLX model and tokenizer.""" 117 148 if self._model_loaded: 118 149 if self.verbose: ··· 132 163 133 164 try: 134 165 # Load model and tokenizer 135 - self.model, self.tokenizer = load( 166 + self.model, self.tokenizer, *_ = load( 136 167 str(self.model_path), adapter_path=self.adapter_path 137 168 ) 138 169 ··· 410 441 server_limit = self._context_length // 2 411 442 return min(requested_tokens or server_limit, server_limit) 412 443 444 + def generate_streaming_gpt( 445 + self, 446 + conversation: Conversation, 447 + max_tokens: int = 500, 448 + temperature: float = 0.7, 449 + top_p: float = 0.9, 450 + repetition_penalty: float = 1.1, 451 + repetition_context_size: int = 20, 452 + ) -> Iterator[str]: 453 + if not self.model or not self.tokenizer: 454 + raise RuntimeError("Model not loaded. Call load_model() first.") 455 + 456 + effective_max_tokens = self.get_effective_max_tokens(max_tokens, False) 457 + 458 + encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) 459 + effective_max_tokens = self.get_effective_max_tokens(max_tokens, False) 460 + 461 + prompt_tokens = encoding.render_conversation_for_completion( 462 + conversation, Role.ASSISTANT 463 + ) 464 + 465 + prompt_array = mx.array(prompt_tokens) # pyright: ignore 466 + 467 + start_time = time.time() 468 + tokens_generated = 0 469 + ttft = None 470 + 471 + sampler = make_sampler(temp=temperature, top_p=top_p) 472 + 473 + # Create repetition penalty processor if needed 474 + logits_processors = [] 475 + if repetition_penalty > 1.0: 476 + logits_processors.append( 477 + make_repetition_penalty(repetition_penalty, repetition_context_size) 478 + ) 479 + 480 + # Generate tokens one by one for streaming 481 + generator = generate_step( 482 + prompt=prompt_array, 483 + model=self.model, # pyright: ignore 484 + max_tokens=effective_max_tokens, 485 + sampler=sampler, 486 + logits_processors=logits_processors if logits_processors else None, 487 + ) 488 + 489 + parser = StreamableParser(encoding, Role.ASSISTANT) 490 + 491 + # Collect tokens and yield text 492 + generated_tokens = [] 493 + is_analysis = None 494 + is_final = None 495 + for token, _ in generator: 496 + token_id = token.item() if hasattr(token, "item") else token 497 + parser.process(token_id) # pyright: ignore 498 + 499 + generated_tokens.append(token_id) 500 + 501 + if is_analysis is None and parser.current_channel == "analysis": 502 + is_analysis = True 503 + yield "**[Reasoning]**\n\n" 504 + 505 + if is_final is None and parser.current_channel == "final": 506 + is_final = True 507 + yield "\n\n---\n\n**[Answer]**\n\n" 508 + 509 + if ttft is None: 510 + ttft = time.time() - start_time 511 + 512 + yield parser.last_content_delta # pyright: ignore 513 + 514 + tokens_generated += 1 515 + 516 + # Check for EOS token - don't yield it 517 + 518 + if token_id == self.tokenizer.eos_token_id: 519 + break 520 + 521 + # Yield metrics at the end 522 + total_latency = time.time() - start_time 523 + tokens_per_second = tokens_generated / total_latency if total_latency > 0 else 0 524 + ttft_ms = (ttft * 1000) if ttft is not None else 0 525 + metrics = GenerationMetrics( 526 + ttft_ms=ttft_ms, 527 + total_tokens=tokens_generated, 528 + tokens_per_second=tokens_per_second, 529 + total_latency_s=total_latency, 530 + ) 531 + yield metrics 532 + 533 + # Print generation statistics if verbose 534 + if self.verbose: 535 + generation_time = time.time() - start_time 536 + tokens_per_second = ( 537 + tokens_generated / generation_time if generation_time > 0 else 0 538 + ) 539 + print( 540 + f"\n\nGenerated {tokens_generated} tokens in {generation_time:.1f}s ({tokens_per_second:.1f} tokens/s)" 541 + ) 542 + 413 543 def generate_streaming( 414 544 self, 415 545 prompt: str, ··· 670 800 f"\n\nGenerated {tokens_generated} tokens in {generation_time:.1f}s ({tokens_per_second:.1f} tokens/s)" 671 801 ) 672 802 803 + def generate_batch_gpt( 804 + self, 805 + conversation: Conversation, 806 + max_tokens: int = 500, 807 + temperature: float = 0.7, 808 + top_p: float = 0.9, 809 + repetition_penalty: float = 1.0, 810 + repetition_context_size: int = 20, 811 + use_chat_template: bool = True, 812 + interactive: bool = False, 813 + ) -> str: 814 + """ 815 + Generate text in batch mode (non-streaming) but for 816 + """ 817 + 818 + if not self.model or not self.tokenizer: 819 + raise RuntimeError("Model not loaded. Call load_model() first.") 820 + 821 + # lets do stuff for harmoy 822 + encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) 823 + effective_max_tokens = self.get_effective_max_tokens(max_tokens, interactive) 824 + 825 + prompt_tokens = encoding.render_conversation_for_completion( 826 + conversation, Role.ASSISTANT 827 + ) 828 + 829 + prompt_array = mx.array(prompt_tokens) 830 + sampler = make_sampler(temp=temperature, top_p=top_p) 831 + logits_processors = [] 832 + 833 + # TODO: Maybe add repetition penalty 834 + generator = generate_step( 835 + prompt=prompt_array, 836 + model=self.model, 837 + max_tokens=effective_max_tokens, 838 + sampler=sampler, 839 + logits_processors=logits_processors if logits_processors else None, 840 + ) 841 + 842 + generated_tokens = [] 843 + all_tokens = [] 844 + 845 + for token, _ in generator: 846 + # Token might be an array or an int 847 + token_id = token.item() if hasattr(token, "item") else token 848 + generated_tokens.append(token_id) 849 + all_tokens.append(token_id) 850 + 851 + # Check for EOS token - don't yield it 852 + if token_id == self.tokenizer.eos_token_id: 853 + break 854 + 855 + response = encoding.parse_messages_from_completion_tokens( 856 + generated_tokens, Role.ASSISTANT 857 + ) 858 + 859 + reasoning_texts = [ 860 + msg.content[0].text for msg in response if msg.channel == "analysis" 861 + ] 862 + final_texts = [ 863 + msg.content[0].text for msg in response if msg.channel != "analysis" 864 + ] 865 + 866 + # Concatenate the lists and turn into a single string. 867 + all_texts = reasoning_texts + final_texts 868 + combined_text = "\n\n".join(filter(None, all_texts)) 869 + 870 + # if they are 2 different fields, then 871 + 872 + return combined_text 873 + 673 874 def generate_batch( 674 875 self, 675 876 prompt: str, ··· 712 913 formatted_prompt = self.tokenizer.apply_chat_template( 713 914 messages, tokenize=False, add_generation_prompt=True 714 915 ) 916 + 715 917 else: 716 918 formatted_prompt = prompt 717 919 ··· 753 955 if token_id == self.tokenizer.eos_token_id: 754 956 break 755 957 958 + print(f"all tokens\n{all_tokens}") 756 959 # Decode all tokens together for proper spacing 757 960 full_response = self.tokenizer.decode(all_tokens) 758 961 ··· 768 971 response, use_chat_stop_tokens=False 769 972 ) 770 973 771 - # Format reasoning models output 772 974 response = self._format_reasoning_response(response) 773 975 774 976 generation_time = time.time() - start_time
+2
server/pyproject.toml
··· 9 9 "mlx-lm", 10 10 "black", 11 11 "huggingface-hub>=0.34.0", 12 + "openai-harmony==0.0.8", 13 + "openresponses-types" 12 14 ] 13 15 14 16 [build-system]
+2 -2
server/pyrightconfig.json
··· 1 1 { 2 2 "venvPath": ".", 3 - "venv": ".venv" 3 + "venv": ".venv", 4 + "typeCheckingMode": "basic", 4 5 } 5 -
+80 -33
server/schemas.py
··· 1 + from dataclasses import dataclass 2 + from enum import Enum, auto 3 + from typing import Any, Dict, List, Union, override 4 + 5 + from openresponses_types import ReasoningParam, TruncationEnum 6 + from openresponses_types.types import ( 7 + AssistantMessageItemParam, 8 + DeveloperMessageItemParam, 9 + Error, 10 + FunctionCallItemParam, 11 + FunctionCallOutputItemParam, 12 + FunctionToolParam, 13 + IncompleteDetails, 14 + ItemReferenceParam, 15 + ReasoningEffortEnum, 16 + ReasoningItemParam, 17 + StreamOptionsParam, 18 + SystemMessageItemParam, 19 + ToolChoiceParam, 20 + UserMessageItemParam, 21 + ) 1 22 from pydantic import BaseModel, Field 2 - from typing import Any, Dict, List, Optional, Union 3 - from dataclasses import dataclass 4 23 5 24 6 25 class CompletionRequest(BaseModel): 7 26 model: str 8 27 prompt: Union[str, List[str]] 9 - max_tokens: Optional[int] = None 10 - temperature: Optional[float] = 0.7 11 - top_p: Optional[float] = 0.9 12 - stream: Optional[bool] = False 13 - stop: Optional[Union[str, List[str]]] = None 14 - repetition_penalty: Optional[float] = 1.1 28 + max_tokens: int | None = None 29 + temperature: float | None = 0.7 30 + top_p: float | None = 0.9 31 + stream: bool | None = False 32 + stop: Union[str, List[str]] | None = None 33 + repetition_penalty: float | None = 1.1 15 34 16 35 17 36 class ChatMessage(BaseModel): ··· 24 43 messages: List[ChatMessage] 25 44 chat_start: bool 26 45 python_code: str 27 - max_tokens: Optional[int] = None 28 - temperature: Optional[float] = 0.7 29 - top_p: Optional[float] = 0.9 30 - stream: Optional[bool] = False 31 - stop: Optional[Union[str, List[str]]] = None 32 - repetition_penalty: Optional[float] = 1.1 46 + max_tokens: int | None = None 47 + temperature: float | None = 0.7 48 + top_p: float | None = 0.9 49 + stream: bool | None = False 50 + stop: Union[str, List[str]] | None = None 51 + repetition_penalty: float | None = 1.1 33 52 34 53 35 54 class CompletionResponse(BaseModel): ··· 55 74 object: str = "model" 56 75 owned_by: str = "mlx-knife" 57 76 permission: List = [] 58 - context_length: Optional[int] = None 77 + context_length: int | None = None 59 78 60 79 61 80 class StartRequest(BaseModel): ··· 69 88 70 89 71 90 class ResponsesRequest(BaseModel): 72 - model: Optional[str] = None 73 - input: Optional[str] = None 74 - reasoning: Optional[Dict[str, Any]] = None 75 - previous_response_id: Optional[str] = None 76 - stream: Optional[bool] = False 77 - tools: Optional[List[Dict[str, Any]]] = None 78 - temperature: Optional[float] = 1 79 - top_p: Optional[float] = 1 80 - max_output_tokens: Optional[int] = None 91 + model: str = "mlx-community/gpt-oss-20b-MXFP4-Q4" 92 + input: ( 93 + str 94 + | list[ 95 + ItemReferenceParam 96 + | ReasoningItemParam 97 + | UserMessageItemParam 98 + | SystemMessageItemParam 99 + | DeveloperMessageItemParam 100 + | AssistantMessageItemParam 101 + | FunctionCallItemParam 102 + | FunctionCallOutputItemParam 103 + ] 104 + ) 105 + reasoning: ReasoningParam = ReasoningParam( 106 + effort=ReasoningEffortEnum.medium, summary=None 107 + ) 108 + previous_response_id: str | None = None 109 + stream: bool | None = False 110 + stream_options: StreamOptionsParam | None = None 111 + tools: list[FunctionToolParam] | None = None 112 + tool_choice: ToolChoiceParam | None = None 113 + temperature: float | None = 1 114 + top_p: float | None = 1 115 + max_output_tokens: int | None = None 116 + store: bool = False 117 + # other service tiers are default, flex, priority 118 + service_tier: str = "auto" 119 + top_logprobs: int = 0 120 + # can put in the Developer msg if none there 121 + instructions: str | None = None 122 + # auto/disabled, returns 400 on disabled 123 + truncation: TruncationEnum = TruncationEnum.disabled 124 + prompt_cache: str | None = None 125 + safety_identifier: str | None = None 126 + max_tool_calls: int | None = None 127 + background: bool = False 81 128 82 129 83 130 class ResponsesResponse(BaseModel): ··· 85 132 object: str = "response" 86 133 created_at: int 87 134 status: str 88 - completed_at: Optional[int] = None 89 - error: Optional[Dict[str, Any]] = None 90 - incomplete_details: Optional[Dict[str, Any]] = None 91 - instructions: Optional[str] = None 92 - max_output_tokens: Optional[int] = None 135 + completed_at: int | None = None 136 + error: Error | None = None 137 + incomplete_details: IncompleteDetails | None = None 138 + instructions: str | None = None 139 + max_output_tokens: int | None = None 93 140 model: str 94 - output: List[Dict[str, Any]] 141 + output: list[Dict[str, Any]] 95 142 parallel_tool_calls: bool = True 96 - previous_response_id: Optional[str] = None 97 - reasoning: Optional[Dict[str, Any]] = Field(default_factory=dict) 143 + previous_response_id: str = "" 144 + reasoning: Dict[str, Any] | None = Field(default_factory=dict) 98 145 store: bool = True 99 146 temperature: float = 1.0 100 147 text: Dict[str, Any] = Field(default_factory=lambda: {"format": {"type": "text"}}) ··· 103 150 top_p: float = 1.0 104 151 truncation: str = "disabled" 105 152 usage: Dict[str, Any] 106 - user: Optional[str] = None 153 + user: str | None = None 107 154 metadata: Dict[str, Any] = Field(default_factory=dict) 108 155 109 156
+6 -4
server/stack/requirements/app-server/packages-app-server.txt
··· 7 7 charset-normalizer==3.4.4 8 8 click==8.3.1 9 9 fastapi==0.119.0 10 - filelock==3.24.3 10 + filelock==3.25.0 11 11 fsspec==2026.2.0 12 12 h11==0.16.0 13 - hf-xet==1.3.1 13 + hf-xet==1.3.2 14 14 huggingface-hub==0.35.0 15 15 idna==3.11 16 16 jinja2==3.1.6 ··· 18 18 mlx-lm==0.28.3 19 19 mypy-extensions==1.1.0 20 20 numpy==2.4.2 21 + openai-harmony==0.0.8 22 + openresponses-types==2.3.0.post1 21 23 packaging==26.0 22 24 pathspec==1.0.4 23 25 platformdirs==4.9.2 24 - protobuf==6.33.5 26 + protobuf==7.34.0 25 27 pydantic==2.12.5 26 28 pydantic-core==2.41.5 27 29 pytokens==0.4.1 28 30 pyyaml==6.0.3 29 - regex==2026.2.19 31 + regex==2026.2.28 30 32 requests==2.32.5 31 33 safetensors==0.7.0 32 34 starlette==0.48.0
+3 -3
server/stack/requirements/app-server/pylock.app-server.meta.json
··· 1 1 { 2 - "lock_input_hash": "sha256:182c606e20dd957344cc3adc54391f47f4b6dd80b4481ddf219392a7aad6e0ce", 2 + "lock_input_hash": "sha256:c836d5cfb697330a57241b2b8f275a804178488ec906b19866809ef33c95ba81", 3 3 "lock_version": 1, 4 - "locked_at": "2026-02-25T13:24:58.188888+00:00", 4 + "locked_at": "2026-03-01T18:20:47.939345+00:00", 5 5 "other_inputs_hash": "sha256:63b3c2cfe2ec414938e81dace7aac779c7b902bae681618cd8827e9f16880985", 6 - "requirements_hash": "sha256:dc0d11b6a0897aff3ae64d3bda37f52b66dd75932f713491457eeea1b68c3fde", 6 + "requirements_hash": "sha256:bc1a0df3c15a1fad6f446be81a50a01835c1a11ff412440842f751ecdce9cbdd", 7 7 "version_inputs_hash": "sha256:58db986b7cd72eeded675f7c9afd8138fe024fb51451131b5562922bbde3cf43" 8 8 }
+118 -76
server/stack/requirements/app-server/pylock.app-server.toml
··· 138 138 139 139 [[packages]] 140 140 name = "filelock" 141 - version = "3.24.3" 141 + version = "3.25.0" 142 142 index = "https://pypi.org/simple" 143 143 144 144 [[packages.wheels]] 145 - url = "https://files.pythonhosted.org/packages/9c/0f/5d0c71a1aefeb08efff26272149e07ab922b64f46c63363756224bd6872e/filelock-3.24.3-py3-none-any.whl" 146 - upload-time = 2026-02-19T00:48:18Z 147 - size = 24331 145 + url = "https://files.pythonhosted.org/packages/f9/0b/de6f54d4a8bedfe8645c41497f3c18d749f0bd3218170c667bf4b81d0cdd/filelock-3.25.0-py3-none-any.whl" 146 + upload-time = 2026-03-01T15:08:44Z 147 + size = 26427 148 148 149 149 [packages.wheels.hashes] 150 - sha256 = "426e9a4660391f7f8a810d71b0555bce9008b0a1cc342ab1f6947d37639e002d" 150 + sha256 = "5ccf8069f7948f494968fc0713c10e5c182a9c9d9eef3a636307a20c2490f047" 151 151 152 152 [[packages]] 153 153 name = "fsspec" ··· 177 177 178 178 [[packages]] 179 179 name = "hf-xet" 180 - version = "1.3.1" 180 + version = "1.3.2" 181 181 index = "https://pypi.org/simple" 182 182 183 183 [[packages.wheels]] 184 - url = "https://files.pythonhosted.org/packages/d4/de/72acb8d7702b3cf9b36a68e8380f3114bf04f9f21cf9e25317457fe31f00/hf_xet-1.3.1-cp313-cp313t-macosx_11_0_arm64.whl" 185 - upload-time = 2026-02-25T00:57:39Z 186 - size = 3518075 184 + url = "https://files.pythonhosted.org/packages/35/56/987b0537ddaf88e17192ea09afa8eca853e55f39a4721578be436f8409df/hf_xet-1.3.2-cp313-cp313t-macosx_11_0_arm64.whl" 185 + upload-time = 2026-02-27T17:25:47Z 186 + size = 3521565 187 187 188 188 [packages.wheels.hashes] 189 - sha256 = "0810b69c64e96dee849036193848007f665dca2311879c9ea8693f4fc37f1795" 189 + sha256 = "c1ae4d3a716afc774e66922f3cac8206bfa707db13f6a7e62dfff74bfc95c9a8" 190 190 191 191 [[packages.wheels]] 192 - url = "https://files.pythonhosted.org/packages/1d/5c/ed728d8530fec28da88ee882b522fccf00dc98e9d7bae4cdb0493070cb17/hf_xet-1.3.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" 193 - upload-time = 2026-02-25T00:57:32Z 194 - size = 4174369 192 + url = "https://files.pythonhosted.org/packages/a8/5c/7e4a33a3d689f77761156cc34558047569e54af92e4d15a8f493229f6767/hf_xet-1.3.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" 193 + upload-time = 2026-02-27T17:25:40Z 194 + size = 4176494 195 195 196 196 [packages.wheels.hashes] 197 - sha256 = "ecd38f98e7f0f41108e30fd4a9a5553ec30cf726df7473dd3e75a1b6d56728c2" 197 + sha256 = "d6dbdf231efac0b9b39adcf12a07f0c030498f9212a18e8c50224d0e84ab803d" 198 198 199 199 [[packages.wheels]] 200 - url = "https://files.pythonhosted.org/packages/df/31/de07e26e396f46d13a09251df69df9444190e93e06a9d30d639e96c8a0ed/hf_xet-1.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl" 201 - upload-time = 2026-02-25T00:57:49Z 202 - size = 4390709 200 + url = "https://files.pythonhosted.org/packages/e2/e1/3af961f71a40e09bf5ee909842127b6b00f5ab4ee3817599dc0771b79893/hf_xet-1.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl" 201 + upload-time = 2026-02-27T17:25:58Z 202 + size = 4394161 203 203 204 204 [packages.wheels.hashes] 205 - sha256 = "b3012c0f2ce1f0863338491a2bc0fd3f84aded0e147ab25f230da1f5249547fd" 205 + sha256 = "35b855024ca37f2dd113ac1c08993e997fbe167b9d61f9ef66d3d4f84015e508" 206 206 207 207 [[packages.wheels]] 208 - url = "https://files.pythonhosted.org/packages/c0/e5/a2f3eaae09da57deceb16a96ebe9ae1f6f7b9b94145a9cd3c3f994e7782a/hf_xet-1.3.1-cp37-abi3-macosx_11_0_arm64.whl" 209 - upload-time = 2026-02-25T00:57:42Z 210 - size = 3523677 208 + url = "https://files.pythonhosted.org/packages/e4/71/b99aed3823c9d1795e4865cf437d651097356a3f38c7d5877e4ac544b8e4/hf_xet-1.3.2-cp37-abi3-macosx_11_0_arm64.whl" 209 + upload-time = 2026-02-27T17:25:50Z 210 + size = 3526171 211 211 212 212 [packages.wheels.hashes] 213 - sha256 = "329c80c86f2dda776bafd2e4813a46a3ee648dce3ac0c84625902c70d7a6ddba" 213 + sha256 = "a85d3d43743174393afe27835bde0cd146e652b5fcfdbcd624602daef2ef3259" 214 214 215 215 [[packages.wheels]] 216 - url = "https://files.pythonhosted.org/packages/61/cd/acbbf9e51f17d8cef2630e61741228e12d4050716619353efc1ac119f902/hf_xet-1.3.1-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" 217 - upload-time = 2026-02-25T00:57:35Z 218 - size = 4178557 216 + url = "https://files.pythonhosted.org/packages/9d/ca/907890ce6ef5598b5920514f255ed0a65f558f820515b18db75a51b2f878/hf_xet-1.3.2-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" 217 + upload-time = 2026-02-27T17:25:43Z 218 + size = 4180750 219 219 220 220 [packages.wheels.hashes] 221 - sha256 = "2973c3ff594c3a8da890836308cae1444c8af113c6f10fe6824575ddbc37eca7" 221 + sha256 = "7c2a054a97c44e136b1f7f5a78f12b3efffdf2eed3abc6746fc5ea4b39511633" 222 222 223 223 [[packages.wheels]] 224 - url = "https://files.pythonhosted.org/packages/08/9c/b667098a636a88358dbeb2caf90e3cb9e4b961f61f6c55bb312793424def/hf_xet-1.3.1-cp37-abi3-musllinux_1_2_x86_64.whl" 225 - upload-time = 2026-02-25T00:57:52Z 226 - size = 4395743 224 + url = "https://files.pythonhosted.org/packages/00/b3/7bc1ff91d1ac18420b7ad1e169b618b27c00001b96310a89f8a9294fe509/hf_xet-1.3.2-cp37-abi3-musllinux_1_2_x86_64.whl" 225 + upload-time = 2026-02-27T17:26:03Z 226 + size = 4398020 227 227 228 228 [packages.wheels.hashes] 229 - sha256 = "e5063789c9d21f51e9ed4edbee8539655d3486e9cad37e96b7af967da20e8b16" 229 + sha256 = "06cdbde243c85f39a63b28e9034321399c507bcd5e7befdd17ed2ccc06dfe14e" 230 230 231 231 [[packages]] 232 232 name = "huggingface-hub" ··· 426 426 sha256 = "0f01dcf33e73d80bd8dc0f20a71303abbafa26a19e23f6b68d1aa9990af90257" 427 427 428 428 [[packages]] 429 + name = "openai-harmony" 430 + version = "0.0.8" 431 + index = "https://pypi.org/simple" 432 + 433 + [[packages.wheels]] 434 + url = "https://files.pythonhosted.org/packages/45/c6/2502f416d46be3ec08bb66d696cccffb57781a499e3ff2e4d7c174af4e8f/openai_harmony-0.0.8-cp38-abi3-macosx_11_0_arm64.whl" 435 + upload-time = 2025-11-05T19:06:57Z 436 + size = 2627806 437 + 438 + [packages.wheels.hashes] 439 + sha256 = "029ec25ca74abe48fdb58eb9fdd2a8c1618581fc33ce8e5653f8a1ffbfbd9326" 440 + 441 + [[packages.wheels]] 442 + url = "https://files.pythonhosted.org/packages/25/3f/1a192b93bb47c6b44cd98ba8cc1d3d2a9308f1bb700c3017e6352da11bda/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" 443 + upload-time = 2025-11-05T19:06:55Z 444 + size = 2953260 445 + 446 + [packages.wheels.hashes] 447 + sha256 = "c007d277218a50db8839e599ed78e0fffe5130f614c3f6d93ae257f282071a29" 448 + 449 + [[packages.wheels]] 450 + url = "https://files.pythonhosted.org/packages/60/c3/3d1e01e2dba517a91760e4a03e4f20ffc75039a6fe584d0e6f9b5c78fd15/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_x86_64.whl" 451 + upload-time = 2025-11-05T19:07:05Z 452 + size = 3205080 453 + 454 + [packages.wheels.hashes] 455 + sha256 = "007b0476a1f331f8130783f901f1da6f5a7057af1a4891f1b6a31dec364189b5" 456 + 457 + [[packages]] 458 + name = "openresponses-types" 459 + version = "2.3.0.post1" 460 + index = "https://pypi.org/simple" 461 + 462 + [[packages.wheels]] 463 + url = "https://files.pythonhosted.org/packages/b2/5f/e16dad89ed24f586da5b01b9b206d3adbf21fe1af8e4dc55d5b93158fde6/openresponses_types-2.3.0.post1-py3-none-any.whl" 464 + upload-time = 2026-01-22T20:02:02Z 465 + size = 13847 466 + 467 + [packages.wheels.hashes] 468 + sha256 = "88f6abcef9cad839203abff420dd080978bf6eb33cc06ddc5d78da4ccdba7613" 469 + 470 + [[packages]] 429 471 name = "packaging" 430 472 version = "26.0" 431 473 index = "https://pypi.org/simple" ··· 466 508 467 509 [[packages]] 468 510 name = "protobuf" 469 - version = "6.33.5" 511 + version = "7.34.0" 470 512 index = "https://pypi.org/simple" 471 513 472 514 [[packages.wheels]] 473 - url = "https://files.pythonhosted.org/packages/a2/6b/e48dfc1191bc5b52950246275bf4089773e91cb5ba3592621723cdddca62/protobuf-6.33.5-cp39-abi3-macosx_10_9_universal2.whl" 474 - upload-time = 2026-01-29T21:51:25Z 475 - size = 427766 515 + url = "https://files.pythonhosted.org/packages/13/c4/6322ab5c8f279c4c358bc14eb8aefc0550b97222a39f04eb3c1af7a830fa/protobuf-7.34.0-cp310-abi3-macosx_10_9_universal2.whl" 516 + upload-time = 2026-02-27T00:30:14Z 517 + size = 429248 476 518 477 519 [packages.wheels.hashes] 478 - sha256 = "a5cb85982d95d906df1e2210e58f8e4f1e3cdc088e52c921a041f9c9a0386de5" 520 + sha256 = "8e329966799f2c271d5e05e236459fe1cbfdb8755aaa3b0914fa60947ddea408" 479 521 480 522 [[packages.wheels]] 481 - url = "https://files.pythonhosted.org/packages/9b/53/a9443aa3ca9ba8724fdfa02dd1887c1bcd8e89556b715cfbacca6b63dbec/protobuf-6.33.5-cp39-abi3-manylinux2014_x86_64.whl" 482 - upload-time = 2026-01-29T21:51:28Z 483 - size = 323465 523 + url = "https://files.pythonhosted.org/packages/b5/57/89727baef7578897af5ed166735ceb315819f1c184da8c3441271dbcfde7/protobuf-7.34.0-cp310-abi3-manylinux2014_x86_64.whl" 524 + upload-time = 2026-02-27T00:30:20Z 525 + size = 324268 484 526 485 527 [packages.wheels.hashes] 486 - sha256 = "cbf16ba3350fb7b889fca858fb215967792dc125b35c7976ca4818bee3521cf0" 528 + sha256 = "964cf977e07f479c0697964e83deda72bcbc75c3badab506fb061b352d991b01" 487 529 488 530 [[packages.wheels]] 489 - url = "https://files.pythonhosted.org/packages/57/bf/2086963c69bdac3d7cff1cc7ff79b8ce5ea0bec6797a017e1be338a46248/protobuf-6.33.5-py3-none-any.whl" 490 - upload-time = 2026-01-29T21:51:32Z 491 - size = 170687 531 + url = "https://files.pythonhosted.org/packages/a4/e7/14dc9366696dcb53a413449881743426ed289d687bcf3d5aee4726c32ebb/protobuf-7.34.0-py3-none-any.whl" 532 + upload-time = 2026-02-27T00:30:23Z 533 + size = 170716 492 534 493 535 [packages.wheels.hashes] 494 - sha256 = "69915a973dd0f60f31a08b8318b73eab2bd6a392c79184b3612226b0a3f8ec02" 536 + sha256 = "e3b914dd77fa33fa06ab2baa97937746ab25695f389869afdf03e81f34e45dc7" 495 537 496 538 [[packages]] 497 539 name = "pydantic" ··· 603 645 604 646 [[packages]] 605 647 name = "regex" 606 - version = "2026.2.19" 648 + version = "2026.2.28" 607 649 index = "https://pypi.org/simple" 608 650 609 651 [[packages.wheels]] 610 - url = "https://files.pythonhosted.org/packages/d2/2d/a849835e76ac88fcf9e8784e642d3ea635d183c4112150ca91499d6703af/regex-2026.2.19-cp313-cp313-macosx_10_13_universal2.whl" 611 - upload-time = 2026-02-19T19:01:23Z 612 - size = 489329 652 + url = "https://files.pythonhosted.org/packages/87/f6/dc9ef48c61b79c8201585bf37fa70cd781977da86e466cd94e8e95d2443b/regex-2026.2.28-cp313-cp313-macosx_10_13_universal2.whl" 653 + upload-time = 2026-02-28T02:17:22Z 654 + size = 489311 613 655 614 656 [packages.wheels.hashes] 615 - sha256 = "8df08decd339e8b3f6a2eb5c05c687fe9d963ae91f352bc57beb05f5b2ac6879" 657 + sha256 = "6d63a07e5ec8ce7184452cb00c41c37b49e67dc4f73b2955b5b8e782ea970784" 616 658 617 659 [[packages.wheels]] 618 - url = "https://files.pythonhosted.org/packages/cd/58/714384efcc07ae6beba528a541f6e99188c5cc1bc0295337f4e8a868296d/regex-2026.2.19-cp313-cp313-macosx_11_0_arm64.whl" 619 - upload-time = 2026-02-19T19:01:27Z 620 - size = 289033 660 + url = "https://files.pythonhosted.org/packages/d2/a6/ba1068a631ebd71a230e7d8013fcd284b7c89c35f46f34a7da02082141b1/regex-2026.2.28-cp313-cp313-macosx_11_0_arm64.whl" 661 + upload-time = 2026-02-28T02:17:26Z 662 + size = 289051 621 663 622 664 [packages.wheels.hashes] 623 - sha256 = "c13228fbecb03eadbfd8f521732c5fda09ef761af02e920a3148e18ad0e09968" 665 + sha256 = "de0cf053139f96219ccfabb4a8dd2d217c8c82cb206c91d9f109f3f552d6b43d" 624 666 625 667 [[packages.wheels]] 626 - url = "https://files.pythonhosted.org/packages/8b/d9/e5dbef95008d84e9af1dc0faabbc34a7fbc8daa05bc5807c5cf86c2bec49/regex-2026.2.19-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" 627 - upload-time = 2026-02-19T19:01:34Z 628 - size = 803718 668 + url = "https://files.pythonhosted.org/packages/12/2f/049901def913954e640d199bbc6a7ca2902b6aeda0e5da9d17f114100ec2/regex-2026.2.28-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" 669 + upload-time = 2026-02-28T02:17:35Z 670 + size = 802101 629 671 630 672 [packages.wheels.hashes] 631 - sha256 = "9cbc69eae834afbf634f7c902fc72ff3e993f1c699156dd1af1adab5d06b7fe7" 673 + sha256 = "e61eea47230eba62a31f3e8a0e3164d0f37ef9f40529fb2c79361bc6b53d2a92" 632 674 633 675 [[packages.wheels]] 634 - url = "https://files.pythonhosted.org/packages/c3/45/ef68d805294b01ec030cfd388724ba76a5a21a67f32af05b17924520cb0b/regex-2026.2.19-cp313-cp313-musllinux_1_2_x86_64.whl" 635 - upload-time = 2026-02-19T19:01:47Z 636 - size = 790026 676 + url = "https://files.pythonhosted.org/packages/05/7a/51cfbad5758f8edae430cb21961a9c8d04bce1dae4d2d18d4186eec7cfa1/regex-2026.2.28-cp313-cp313-musllinux_1_2_x86_64.whl" 677 + upload-time = 2026-02-28T02:17:49Z 678 + size = 790152 637 679 638 680 [packages.wheels.hashes] 639 - sha256 = "790dbf87b0361606cb0d79b393c3e8f4436a14ee56568a7463014565d97da02a" 681 + sha256 = "9185cc63359862a6e80fe97f696e04b0ad9a11c4ac0a4a927f979f611bfe3768" 640 682 641 683 [[packages.wheels]] 642 - url = "https://files.pythonhosted.org/packages/a9/a2/e0b4575b93bc84db3b1fab24183e008691cd2db5c0ef14ed52681fbd94dd/regex-2026.2.19-cp313-cp313t-macosx_10_13_universal2.whl" 643 - upload-time = 2026-02-19T19:01:54Z 644 - size = 492202 684 + url = "https://files.pythonhosted.org/packages/24/07/6c7e4cec1e585959e96cbc24299d97e4437a81173217af54f1804994e911/regex-2026.2.28-cp313-cp313t-macosx_10_13_universal2.whl" 685 + upload-time = 2026-02-28T02:17:56Z 686 + size = 492541 645 687 646 688 [packages.wheels.hashes] 647 - sha256 = "93d881cab5afdc41a005dba1524a40947d6f7a525057aa64aaf16065cf62faa9" 689 + sha256 = "97054c55db06ab020342cc0d35d6f62a465fa7662871190175f1ad6c655c028f" 648 690 649 691 [[packages.wheels]] 650 - url = "https://files.pythonhosted.org/packages/70/0c/fe89966dfae43da46f475362401f03e4d7dc3a3c955b54f632abc52669e0/regex-2026.2.19-cp313-cp313t-macosx_11_0_arm64.whl" 651 - upload-time = 2026-02-19T19:01:59Z 652 - size = 291236 692 + url = "https://files.pythonhosted.org/packages/5b/11/c301f8cb29ce9644a5ef85104c59244e6e7e90994a0f458da4d39baa8e17/regex-2026.2.28-cp313-cp313t-macosx_11_0_arm64.whl" 693 + upload-time = 2026-02-28T02:18:00Z 694 + size = 291509 653 695 654 696 [packages.wheels.hashes] 655 - sha256 = "d793c5b4d2b4c668524cd1651404cfc798d40694c759aec997e196fe9729ec60" 697 + sha256 = "d6cfe798d8da41bb1862ed6e0cba14003d387c3c0c4a5d45591076ae9f0ce2f8" 656 698 657 699 [[packages.wheels]] 658 - url = "https://files.pythonhosted.org/packages/90/7c/981ea0694116793001496aaf9524e5c99e122ec3952d9e7f1878af3a6bf1/regex-2026.2.19-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" 659 - upload-time = 2026-02-19T19:02:08Z 660 - size = 812922 700 + url = "https://files.pythonhosted.org/packages/55/c2/fd429066da487ef555a9da73bf214894aec77fc8c66a261ee355a69871a8/regex-2026.2.28-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" 701 + upload-time = 2026-02-28T02:18:08Z 702 + size = 812044 661 703 662 704 [packages.wheels.hashes] 663 - sha256 = "1e7a08622f7d51d7a068f7e4052a38739c412a3e74f55817073d2e2418149619" 705 + sha256 = "5cdcc17d935c8f9d3f4db5c2ebe2640c332e3822ad5d23c2f8e0228e6947943a" 664 706 665 707 [[packages.wheels]] 666 - url = "https://files.pythonhosted.org/packages/59/78/9ef4356bd4aed752775bd18071034979b85f035fec51f3a4f9dea497a254/regex-2026.2.19-cp313-cp313t-musllinux_1_2_x86_64.whl" 667 - upload-time = 2026-02-19T19:02:20Z 668 - size = 799636 708 + url = "https://files.pythonhosted.org/packages/0a/50/414ba0731c4bd40b011fa4703b2cc86879ec060c64f2a906e65a56452589/regex-2026.2.28-cp313-cp313t-musllinux_1_2_x86_64.whl" 709 + upload-time = 2026-02-28T02:18:23Z 710 + size = 800184 669 711 670 712 [packages.wheels.hashes] 671 - sha256 = "c227f2922153ee42bbeb355fd6d009f8c81d9d7bdd666e2276ce41f53ed9a743" 713 + sha256 = "aaffaecffcd2479ce87aa1e74076c221700b7c804e48e98e62500ee748f0f550" 672 714 673 715 [[packages]] 674 716 name = "requests"
+2
server/stack/requirements/app-server/requirements-app-server.in
··· 5 5 mlx-lm==0.28.3 6 6 black==25.9.0 7 7 huggingface-hub==0.35.0 8 + openai-harmony==0.0.8 9 + openresponses-types
+2 -1
server/stack/venvstacks.toml
··· 27 27 "mlx-lm==0.28.3", 28 28 "black==25.9.0", 29 29 "huggingface-hub==0.35.0", 30 - 30 + "openai-harmony==0.0.8", 31 + "openresponses-types" 31 32 ] 32 33 33 34 [tool.uv]
+39
server/uv.lock
··· 306 306 ] 307 307 308 308 [[package]] 309 + name = "openai-harmony" 310 + version = "0.0.8" 311 + source = { registry = "https://pypi.org/simple" } 312 + dependencies = [ 313 + { name = "pydantic" }, 314 + ] 315 + sdist = { url = "https://files.pythonhosted.org/packages/3e/92/2d038d096f29179c7c9571b431f9e739f87a487121901725e23fe338dd9d/openai_harmony-0.0.8.tar.gz", hash = "sha256:6e43f98e6c242fa2de6f8ea12eab24af63fa2ed3e89c06341fb9d92632c5cbdf", size = 284777, upload-time = "2025-11-05T19:07:06.727Z" } 316 + wheels = [ 317 + { url = "https://files.pythonhosted.org/packages/45/c6/2502f416d46be3ec08bb66d696cccffb57781a499e3ff2e4d7c174af4e8f/openai_harmony-0.0.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:029ec25ca74abe48fdb58eb9fdd2a8c1618581fc33ce8e5653f8a1ffbfbd9326", size = 2627806, upload-time = "2025-11-05T19:06:57.063Z" }, 318 + { url = "https://files.pythonhosted.org/packages/d3/d2/ce6953ca87db9cae3e775024184da7d1c5cb88cead19a2d75b42f00a959c/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4f709815924ec325b9a890e6ab2bbb0ceec8e319a4e257328eb752cf36b2efc", size = 2948463, upload-time = "2025-11-05T19:06:48.17Z" }, 319 + { url = "https://files.pythonhosted.org/packages/fa/4c/b553c9651662d6ce102ca7f3629d268b23df1abe5841e24bed81e8a8e949/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5cfcfd963b50a41fc656c84d3440ca6eecdccd6c552158ce790b8f2e33dfb5a9", size = 2704083, upload-time = "2025-11-05T19:06:50.205Z" }, 320 + { url = "https://files.pythonhosted.org/packages/9b/af/4eec8f9ab9c27bcdb444460c72cf43011d176fc44c79d6e113094ca1e152/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a3a16972aa1cee38ea958470cd04ac9a2d5ac38fdcf77ab686611246220c158", size = 2959765, upload-time = "2025-11-05T19:06:53.62Z" }, 321 + { url = "https://files.pythonhosted.org/packages/11/3c/33f3374e4624e0e776f6b13b73c45a7ead7f9c4529f8369ed5bfcaa30cac/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b4d5cfa168e74d08f8ba6d58a7e49bc7daef4d58951ec69b66b0d56f4927a68d", size = 3427031, upload-time = "2025-11-05T19:06:51.829Z" }, 322 + { url = "https://files.pythonhosted.org/packages/25/3f/1a192b93bb47c6b44cd98ba8cc1d3d2a9308f1bb700c3017e6352da11bda/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c007d277218a50db8839e599ed78e0fffe5130f614c3f6d93ae257f282071a29", size = 2953260, upload-time = "2025-11-05T19:06:55.406Z" }, 323 + { url = "https://files.pythonhosted.org/packages/5b/f8/93b582cad3531797c3db7c2db5400fd841538ccddfd9f5e3df61be99a630/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8565d4f5a0638da1bffde29832ed63c9e695c558611053add3b2dc0b56c92dbc", size = 3127044, upload-time = "2025-11-05T19:06:59.553Z" }, 324 + { url = "https://files.pythonhosted.org/packages/1d/10/4327dbf87f75ae813405fd9a9b4a5cde63d506ffed0a096a440a4cabd89c/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:cbaa3bda75ef0d8836e1f8cc84af62f971b1d756d740efc95c38c3e04c0bfde2", size = 2932931, upload-time = "2025-11-05T19:07:01.437Z" }, 325 + { url = "https://files.pythonhosted.org/packages/8a/c8/1774eec4f6f360ef57618fb8f52e3d3af245b2491bd0297513aa09eec04b/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:772922a9bd24e133950fad71eb1550836f415a88e8c77870e12d0c3bd688ddc2", size = 2996140, upload-time = "2025-11-05T19:07:03.438Z" }, 326 + { url = "https://files.pythonhosted.org/packages/60/c3/3d1e01e2dba517a91760e4a03e4f20ffc75039a6fe584d0e6f9b5c78fd15/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:007b0476a1f331f8130783f901f1da6f5a7057af1a4891f1b6a31dec364189b5", size = 3205080, upload-time = "2025-11-05T19:07:05.078Z" }, 327 + { url = "https://files.pythonhosted.org/packages/14/63/119de431572d7c70a7bf1037034a9be6ed0a7502a7498ba7302bca5b3242/openai_harmony-0.0.8-cp38-abi3-win32.whl", hash = "sha256:a9b5f893326b28d9e935ade14b4f655f5a840942473bc89b201c25f7a15af9cf", size = 2082457, upload-time = "2025-11-05T19:07:09.631Z" }, 328 + { url = "https://files.pythonhosted.org/packages/40/1f/c83cf5a206c263ee70448a5ae4264682555f4d0b5bed0d2cc6ca1108103d/openai_harmony-0.0.8-cp38-abi3-win_amd64.whl", hash = "sha256:39d44f0d8f466bd56698e7ead708bead3141e27b9b87e3ab7d5a6d0e4a869ee5", size = 2438369, upload-time = "2025-11-05T19:07:08.1Z" }, 329 + ] 330 + 331 + [[package]] 332 + name = "openresponses-types" 333 + version = "2.3.0.post1" 334 + source = { registry = "https://pypi.org/simple" } 335 + dependencies = [ 336 + { name = "pydantic" }, 337 + ] 338 + sdist = { url = "https://files.pythonhosted.org/packages/d9/26/b612c3215f5599714fa94d63eb5ee59b4eb66dbdeeaf86bb4d848359484d/openresponses_types-2.3.0.post1.tar.gz", hash = "sha256:11b8896d3621d2ac2439f6ff106f34ddcb1bbd517c317a6c852a9df2e98a0753", size = 19254, upload-time = "2026-01-22T20:02:03.933Z" } 339 + wheels = [ 340 + { url = "https://files.pythonhosted.org/packages/b2/5f/e16dad89ed24f586da5b01b9b206d3adbf21fe1af8e4dc55d5b93158fde6/openresponses_types-2.3.0.post1-py3-none-any.whl", hash = "sha256:88f6abcef9cad839203abff420dd080978bf6eb33cc06ddc5d78da4ccdba7613", size = 13847, upload-time = "2026-01-22T20:02:02.582Z" }, 341 + ] 342 + 343 + [[package]] 309 344 name = "packaging" 310 345 version = "25.0" 311 346 source = { registry = "https://pypi.org/simple" } ··· 501 536 { name = "fastapi" }, 502 537 { name = "huggingface-hub" }, 503 538 { name = "mlx-lm" }, 539 + { name = "openai-harmony" }, 540 + { name = "openresponses-types" }, 504 541 { name = "uvicorn" }, 505 542 ] 506 543 ··· 510 547 { name = "fastapi" }, 511 548 { name = "huggingface-hub", specifier = ">=0.34.0" }, 512 549 { name = "mlx-lm" }, 550 + { name = "openai-harmony", specifier = "==0.0.8" }, 551 + { name = "openresponses-types" }, 513 552 { name = "uvicorn" }, 514 553 ] 515 554
+1 -1
tiles/Cargo.toml
··· 1 1 [package] 2 2 name = "tiles" 3 - version = "0.4.1" 3 + version = "0.4.2" 4 4 edition = "2024" 5 5 6 6 [dependencies]
+8 -12
tiles/src/commands/mod.rs
··· 20 20 21 21 use crate::{AccountArgs, AccountCommands}; 22 22 23 - const FTUE_VERSION_TITLE: &str = "Tiles v0.4.1"; 23 + const FTUE_VERSION_TITLE: &str = "Tiles"; 24 24 const FTUE_HEADER: &str = "Initializing local account..."; 25 25 const FTUE_ASCII_ART: &str = r#" 26 26 ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ ··· 41 41 ▓▓▓▓▓▓▓▓ 42 42 "#; 43 43 const FTUE_REASSURANCE_LOCAL: &str = "On-device by default."; 44 - const FTUE_REASSURANCE_NO_CLOUD: &str = "Online models and identity optional."; 44 + // const FTUE_REASSURANCE_NO_CLOUD: &str = "Online models and identity optional."; 45 45 const FTUE_NICKNAME_PROMPT: &str = "Choose a username:"; 46 46 const FTUE_NICKNAME_REQUIRED: &str = "Username is required. Please enter a username:"; 47 47 const FTUE_ACCOUNT_CREATED: &str = "✓ Account created"; ··· 52 52 const FTUE_DATA_DIR_CHANGE_HINT: &str = "Change data path later:"; 53 53 const FTUE_DATA_DIR_CHANGE_COMMAND: &str = "tiles data set-path <PATH>"; 54 54 const FTUE_CUSTOM_DATA_PROMPT: &str = "Use a custom data directory now? [y/N]"; 55 + const FTUE_UPDATE_COMMAND: &str = "tiles update"; 55 56 56 57 pub fn run_setup_for_ftue(run_args: &RunArgs) -> Result<()> { 57 58 // initializes config directory ··· 62 63 let root_config = get_or_create_config()?; 63 64 let root_user_details = get_root_user_details(&root_config)?; 64 65 println!("{}", FTUE_ASCII_ART.blue()); 65 - println!("{}", FTUE_VERSION_TITLE); 66 + println!("{} {}", FTUE_VERSION_TITLE, env!("CARGO_PKG_VERSION")); 66 67 println!(); 67 68 68 69 if root_user_details.id.is_empty() { 69 70 println!("{}", FTUE_HEADER); 70 71 println!(); 71 72 println!("{}", FTUE_REASSURANCE_LOCAL); 72 - println!("{}", FTUE_REASSURANCE_NO_CLOUD); 73 73 println!(); 74 74 // FTUE 75 75 setup_root_account(root_config.clone())?; ··· 236 236 ); 237 237 238 238 println!("{}", update_str.yellow()); 239 - println!("You can always update via `tiles update` later\n"); 239 + println!("You can always update Tiles later via:"); 240 + println!(" {}\n", FTUE_UPDATE_COMMAND.bright_blue().bold()); 241 + println!("{}", "Do you want to update now? (Y/n)".to_string().green()); 240 242 241 - println!("{}", "Do you want to update now? (Y/N)".to_string().green()); 242 243 let stdin = io::stdin(); 243 244 let mut input = String::new(); 244 245 stdin.read_line(&mut input)?; ··· 341 342 342 343 #[test] 343 344 fn ftue_copy_matches_expected_constants() { 344 - assert_eq!(FTUE_VERSION_TITLE, "Tiles v0.4.1"); 345 345 assert_eq!(FTUE_HEADER, "Initializing local account..."); 346 346 assert_eq!(FTUE_REASSURANCE_LOCAL, "On-device by default."); 347 - assert_eq!( 348 - FTUE_REASSURANCE_NO_CLOUD, 349 - "Online models and identity optional." 350 - ); 351 347 assert_eq!(FTUE_NICKNAME_PROMPT, "Choose a username:"); 352 348 assert_eq!(FTUE_ACCOUNT_LABEL, "Account"); 353 349 assert_eq!(FTUE_ACCOUNT_DETAILS_HINT, "View full details:"); ··· 355 351 assert_eq!(FTUE_DATA_DIR_CHANGE_HINT, "Change data path later:"); 356 352 assert_eq!( 357 353 FTUE_CUSTOM_DATA_PROMPT, 358 - "Use a custom data directory now? [y/N]" 354 + "Use a custom data directory now? [Y/N]" 359 355 ); 360 356 } 361 357
+1 -3
tiles/src/main.rs
··· 132 132 }; 133 133 commands::run_setup_for_ftue(&run_args) 134 134 .inspect_err(|e| eprintln!("Failed to setup Tiles due to {:?}", e))?; 135 - commands::try_app_update() 136 - .await 137 - .inspect_err(|e| eprintln!("Failed to update the app due to {:?}", e))?; 135 + let _ = commands::try_app_update().await; 138 136 commands::run(&runtime, run_args).await; 139 137 } 140 138 Some(Commands::Run {
+67 -21
tiles/src/runtime/mlx.rs
··· 1 1 use crate::runtime::RunArgs; 2 2 use crate::utils::config::{ConfigProvider, DefaultProvider, get_memory_path}; 3 3 use crate::utils::hf_model_downloader::*; 4 - use anyhow::{Context, Result}; 4 + use anyhow::{Context, Result, anyhow}; 5 5 use futures_util::StreamExt; 6 6 use owo_colors::OwoColorize; 7 7 use reqwest::{Client, StatusCode}; ··· 47 47 // think: String, 48 48 reply: String, 49 49 code: String, 50 + prev_response_id: String, 50 51 metrics: Option<BenchmarkMetrics>, 51 52 } 52 53 ··· 245 246 let mut editor = Editor::<TilesHinter, DefaultHistory>::with_config(config).unwrap(); 246 247 editor.set_helper(Some(TilesHinter)); 247 248 let mut g_reply: String = "".to_owned(); 249 + let mut prev_response_id: String = String::from(""); 250 + 248 251 loop { 249 252 let readline = editor.readline(">>> "); 250 253 let input = match readline { ··· 292 295 &python_code, 293 296 &g_reply, 294 297 run_args, 298 + &prev_response_id, 295 299 ) 296 300 .await 297 301 { ··· 308 312 if run_args.memory { 309 313 println!("\n{}", response.reply.trim()); 310 314 } else { 315 + prev_response_id = response.prev_response_id; 311 316 println!("\n"); 312 317 } 313 318 // Display benchmark metrics if available ··· 345 350 } 346 351 } 347 352 348 - pub async fn ping() -> Result<(), String> { 353 + pub async fn ping() -> Result<()> { 349 354 let client = Client::new(); 350 355 let res = client.get("http://127.0.0.1:6969/ping").send().await; 351 356 352 357 match res { 353 - Err(_) => Err(String::from("Server is down")), 358 + Err(err) => Err(anyhow!("Server down due to {:?}", err)), 354 359 _ => Ok(()), 355 360 } 356 361 } ··· 365 370 let body = json!({ 366 371 "model": model_name, 367 372 "memory_path": memory_path, 368 - "system_prompt": modelfile.system.clone().unwrap_or(default_modelfile.system.clone().unwrap()) 373 + "system_prompt": modelfile.system.clone().unwrap_or(default_modelfile.system.clone().unwrap_or("".to_owned())) 369 374 }); 370 375 371 376 let res = client ··· 399 404 python_code: &str, 400 405 g_reply: &str, 401 406 run_args: &RunArgs, 402 - ) -> Result<ChatResponse, String> { 407 + prev_response_id: &str, 408 + ) -> Result<ChatResponse> { 403 409 let client = Client::new(); 404 - 405 410 let body = json!({ 406 411 "model": model_name, 412 + "input": [{ 413 + "type": "message", 414 + "role": "user", 415 + "content": input 416 + }, 417 + { 418 + "type": "message", 419 + "role": "developer", 420 + "content": "" 421 + }], 422 + "reasoning": {"effort": "medium"}, 423 + "chat_start": chat_start, 424 + "stream": true, 425 + "previous_response_id": prev_response_id, 426 + "python_code": python_code, 427 + "messages": [{"role": "assistant", "content": g_reply}, {"role": "user", "content": input}] 428 + }); 429 + 430 + let memory_body = json!({ 431 + "model": model_name, 407 432 "input": input, 408 433 "chat_start": chat_start, 409 434 "stream": true, 410 435 "python_code": python_code, 411 436 "messages": [{"role": "assistant", "content": g_reply}, {"role": "user", "content": input}] 437 + 412 438 }); 413 - let api_url = if run_args.memory { 414 - "http://127.0.0.1:6969/v1/chat/completions" 439 + let res = if run_args.memory { 440 + let api_url = "http://127.0.0.1:6969/v1/chat/completions"; 441 + client.post(api_url).json(&memory_body).send().await? 415 442 } else { 416 - "http://127.0.0.1:6969/v1/responses" 443 + let api_url = "http://127.0.0.1:6969/v1/responses"; 444 + client.post(api_url).json(&body).send().await? 417 445 }; 418 - let res = client.post(api_url).json(&body).send().await.unwrap(); 419 446 420 447 let mut stream = res.bytes_stream(); 421 448 let mut accumulated = String::new(); 422 449 println!(); 423 450 let mut metrics: Option<BenchmarkMetrics> = None; 424 451 let mut is_answer_start = false; 452 + let mut prev_response_id: String = String::from(""); 453 + let mut output_completed: bool = false; 425 454 while let Some(chunk) = stream.next().await { 426 - let chunk = chunk.unwrap(); 455 + let chunk = chunk?; 427 456 let s = String::from_utf8_lossy(&chunk); 428 457 for line in s.lines() { 429 458 if !line.starts_with("data: ") { ··· 436 465 return Ok(convert_to_chat_response( 437 466 &accumulated, 438 467 run_args.memory, 468 + prev_response_id, 439 469 metrics, 440 470 )); 441 471 } 442 472 443 - // Parse JSON 473 + //TODO: This will break if we ask the model to give an essay and all 444 474 let v: Value = serde_json::from_str(data).unwrap(); 445 475 // Check for metrics in the response 446 476 if let Some(metrics_obj) = v.get("metrics") { ··· 449 479 let model_text: Option<&str> = if run_args.memory { 450 480 v["choices"][0]["delta"]["content"].as_str() 451 481 } else { 482 + prev_response_id = serde_json::to_string(&v["id"])?; 483 + // println!("prev_id {}", prev_response_id); 484 + if serde_json::to_string(&v["status"])?.contains("completed") { 485 + output_completed = true; 486 + } 487 + 452 488 v["output"][0]["content"][0]["text"].as_str() 453 489 }; 454 490 455 491 if let Some(delta) = model_text { 456 - accumulated.push_str(delta); 457 - if !run_args.memory && delta.contains("**[Answer]**") { 458 - is_answer_start = true; 459 - } 460 - if !is_answer_start { 461 - print!("{}", delta.dimmed()); 492 + if !run_args.memory { 493 + // TODO: This doesn't support non-harmonic models, so need to handle it 494 + if delta.contains("**[Answer]**") { 495 + is_answer_start = true 496 + } 497 + if !output_completed { 498 + accumulated.push_str(delta); 499 + if !is_answer_start { 500 + print!("{}", delta.dimmed()); 501 + } else { 502 + print!("{}", delta); 503 + }; 504 + } 462 505 } else { 463 - print!("{}", delta); 506 + accumulated.push_str(delta); 464 507 } 465 508 use std::io::Write; 466 509 std::io::stdout().flush().ok(); 467 510 } 468 511 } 469 512 } 470 - Err(String::from("request failed")) 513 + 514 + Err(anyhow!("Result failed")) 471 515 } 472 516 473 517 fn convert_to_chat_response( 474 518 content: &str, 475 519 memory_mode: bool, 520 + prev_response_id: String, 476 521 metrics: Option<BenchmarkMetrics>, 477 522 ) -> ChatResponse { 478 523 ChatResponse { 479 524 reply: extract_reply(content, memory_mode), 480 525 code: extract_python(content), 526 + prev_response_id, 481 527 metrics, 482 528 } 483 529 } ··· 511 557 Ok(()) => { 512 558 break; 513 559 } 514 - Err(_) => { 560 + Err(_err) => { 515 561 println!("tiling..."); 516 562 sleep(Duration::from_secs(5)).await; 517 563 }
+5 -6
tiles/src/utils/installer.rs
··· 11 11 12 12 use anyhow::{Result, anyhow}; 13 13 use reqwest::{Client, header::HeaderMap}; 14 - use semver::{Version, VersionReq}; 14 + use semver::Version; 15 15 use serde::Deserialize; 16 16 17 17 const RELEASES_BASE_ENDPOINT: &str = "https://api.github.com"; ··· 64 64 65 65 pub async fn get_update_info() -> Result<UpdateInfo> { 66 66 let latest_vsn = get_latest_version(RELEASES_BASE_ENDPOINT).await?; 67 - 68 - let req_vsn = VersionReq::parse(&latest_vsn)?; 67 + let req_vsn = Version::parse(&latest_vsn)?; 69 68 let current_vsn = Version::parse(env!("CARGO_PKG_VERSION")) 70 69 .map_err(|e| anyhow!("Failed to parse pkg version due to {}", e))?; 71 70 72 - if req_vsn.matches(&current_vsn) { 71 + if req_vsn.cmp_precedence(&current_vsn).is_gt() { 73 72 Ok(UpdateInfo { 74 - can_update: false, 73 + can_update: true, 75 74 latest_version: req_vsn.to_string(), 76 75 current_version: current_vsn.to_string(), 77 76 }) 78 77 } else { 79 78 Ok(UpdateInfo { 80 - can_update: true, 79 + can_update: false, 81 80 latest_version: req_vsn.to_string(), 82 81 current_version: current_vsn.to_string(), 83 82 })