Merge pull request #92 from tilesprivacy/feat/harmony-renderer

+64

CHANGELOG.md

··· 1 + # Changelog 2 + 3 + All notable changes to this project are documented in this file. 4 + The format is based on https://keepachangelog.com/en/1.1.0/ 5 + 6 + ## [Unreleased] 7 + 8 + ## [0.4.2] - 2026-03-01 9 + ### Added 10 + - Added FTUE changes for account setup in https://github.com/tilesprivacy/tiles/pull/88 11 + - Added OTA updater in https://github.com/tilesprivacy/tiles/pull/89 12 + - Supports auto update checking and installing 13 + - Use `tiles update` for updating Tiles CLI manually 14 + 15 + ### Changed 16 + - Integrated Harmony renderer for gpt-oss model in https://github.com/tilesprivacy/tiles/pull/92 17 + 18 + ### Fixed 19 + - fix: Added path unavailability warning during installation in https://github.com/tilesprivacy/tiles/pull/90 20 + - coverage patch-1 in @https://github.com/tilesprivacy/tiles/pull/91 21 + 22 + ## [0.4.1] - 2026-02-22 23 + ### Added 24 + - Identity system for Tiles: 25 + - `tiles account` to show account details 26 + - `tiles account create <nickname>` to create root identity and optional nickname 27 + - `tiles account set-nickname` to set a nickname for root identity 28 + - Updated CLI to include default `tiles` command 29 + 30 + ## [0.4.0] - 2026-02-04 31 + ### Added 32 + - Portable Python runtime in the installer (no system Python required) 33 + - Bundled default Modelfiles and direct reading of system prompt from Modelfile 34 + - Support for `gpt-oss-20b` in interactive chat 35 + - Basic support for the Open Responses API (`/v1/responses`) and REST endpoints 36 + - Token metrics for model responses in the REPL 37 + - `-m` flag for `tiles run` to execute Tiles in memory mode (experimental) 38 + - Tilekit 0.2.0: `optimize` subcommand for automatic system-prompt optimization via DSRs 39 + 40 + ## [0.3.1] - 2026-01-09 41 + ### Added 42 + - `--relay-count` / `-r` option for `tiles run` (helps if model gets stuck) 43 + - CLI shows progress status while downloading models 44 + - Slash commands and placeholder hint in the REPL 45 + - Ability to set custom memory location via `tiles memory set-path <PATH>` 46 + 47 + ### Changed 48 + - Minor internal refactoring 49 + 50 + ## [0.3.0] - 2026-01-06 51 + ### Fixed 52 + - Tiles binary startup issue when run from outside a project directory 53 + - Model not unloading after exiting the REPL 54 + - Updated Python version to 3.13 for development 55 + - Enabled basic Linux compatibility 56 + 57 + ### Changed 58 + - Basic refactoring to support multiple inference runtimes 59 + 60 + ## [0.2.0] - 2025-12-20 61 + ### Added 62 + - Server commands 63 + - Streaming support with “thinking tokens” in the CLI 64 + - Auto-downloading of model specified in Modelfile

+1 -1

Cargo.lock

··· 4168 4168 4169 4169 [[package]] 4170 4170 name = "tiles" 4171 - version = "0.4.1" 4171 + version = "0.4.2" 4172 4172 dependencies = [ 4173 4173 "anyhow", 4174 4174 "async-std",

-39

modelfiles/gpt-oss

··· 1 1 FROM mlx-community/gpt-oss-20b-MXFP4-Q4 2 - SYSTEM """ 3 - You are Tiles, a helpful AI assistant. You have access to a secure Python sandbox for running code and managing your memory. 4 - 5 - ## CRITICAL: Output Format 6 - Your output must be structured into three distinct channels using these exact markers: 7 - 8 - 1. **Analysis Channel**: Thinking and planning. 9 - - Start: `<|channel|>analysis<|message|>` 10 - - End: `<|end|>` 11 - 12 - 2. **Code Channel**: Python code to execute. 13 - - Start: `<|channel|>code<|message|>` 14 - - End: `<|end|>` 15 - 16 - 3. **Final Response Channel**: Your final answer to the user. 17 - - Start: `<|channel|>final<|message|>` 18 - - End: `<|end|>` 19 - 20 - **Rules**: 21 - - ALWAYS start with the Analysis channel. 22 - - If you need to run code, use the Code channel. 23 - - If no code is needed, use the Final Response channel after Analysis. 24 - - **CRITICAL: ALWAYS assign function results and calculations to variables.** 25 - ```python 26 - # CORRECT 27 - result = math.sqrt(12345) 28 - # WRONG - The result will be LOST 29 - math.sqrt(12345) 30 - ``` 31 - - NEVER mention "ChatGPT" or "OpenAI". You are Tiles. 32 - - NEVER use legacy tags like `<think>`, `<python>`, or `<reply>`. Use ONLY the channel markers above. 33 - 34 - ### Handling Results 35 - When you receive a `<result>` block, it indicates the outcome of your code. 36 - - Analyze the result in the **Analysis** channel. 37 - - If the calculation is complete, provide the final answer in the **Final Response** channel immediately. 38 - - **DO NOT** repeat the code once you have the results unless you need to fix a specific error. 39 - - **DO NOT** ask the user if you should run code; just run it if needed using the Code channel. 40 - """

+1 -1

scripts/bundler.sh

··· 16 16 17 17 cargo build -p tiles --${TARGET} 18 18 19 - rm -rf "${DIST_DIR}" 19 + # rm -rf "${DIST_DIR}" 20 20 21 21 mkdir -p "${DIST_DIR}/tmp" 22 22 cp "target/${TARGET}/${BINARY_NAME}" "${DIST_DIR}/tmp/"

+1 -1

scripts/install.sh

··· 4 4 ENV="prod" # prod is another env, try taking it from github env 5 5 REPO="tilesprivacy/tiles" 6 6 # VERSION="${TILES_VERSION:-latest}" 7 - VERSION="0.4.1" 7 + VERSION="0.4.2" 8 8 INSTALL_DIR="$HOME/.local/bin" # CLI install location 9 9 SERVER_DIR="$HOME/.local/lib/tiles/server" # Python server folder 10 10 MODELFILE_DIR="$HOME/.local/lib/tiles/modelfiles" # Python server folder

+10 -16

server/api.py

··· 1 - from fastapi import FastAPI, HTTPException 2 - 3 - from .schemas import ( 4 - ChatMessage, 5 - ChatCompletionRequest, 6 - StartRequest, 7 - downloadRequest, 8 - ResponsesRequest, 9 - ) 10 1 import logging 11 2 import sys 12 3 from typing import Optional 13 4 5 + from fastapi import FastAPI, HTTPException 14 6 from fastapi.responses import StreamingResponse 15 7 from pydantic import BaseModel, Field 16 8 9 + from . import runtime 17 10 from .hf_downloader import pull_model 18 - 11 + from .mem_agent.engine import execute_sandboxed_code 19 12 from .mem_agent.utils import ( 20 13 create_memory_if_not_exists, 21 14 format_results, 22 15 ) 23 - from .mem_agent.engine import execute_sandboxed_code 24 - 25 - from . import runtime 16 + from .schemas import ( 17 + ChatCompletionRequest, 18 + ChatMessage, 19 + ResponsesRequest, 20 + StartRequest, 21 + downloadRequest, 22 + ) 26 23 27 24 logger = logging.getLogger("app") 28 25 _current_model_path: Optional[str] = None ··· 92 89 Create a response with openResponse format 93 90 """ 94 91 95 - global _messages 96 - 97 92 if request.stream: 98 - # Streaming response 99 93 return StreamingResponse( 100 94 runtime.backend.generate_response_chat_stream(request), 101 95 media_type="text/plain",

+196 -70

server/backend/mlx.py

··· 1 - from .mlx_runner import MLXRunner 2 - from ..cache_utils import get_model_path 1 + import json 2 + import logging 3 + import time 4 + import uuid 5 + from collections.abc import AsyncGenerator 6 + 3 7 from fastapi import HTTPException 8 + from openai_harmony import ( 9 + Conversation, 10 + DeveloperContent, 11 + Message, 12 + ReasoningEffort, 13 + Role, 14 + SystemContent, 15 + ) 16 + from openresponses_types import ReasoningEffortEnum 17 + from openresponses_types.types import ( 18 + DeveloperMessageItemParam, 19 + Error, 20 + IncompleteDetails, 21 + UserMessageItemParam, 22 + ) 23 + 24 + from ..cache_utils import get_model_path 25 + from ..hf_downloader import pull_model 4 26 from ..schemas import ( 27 + ChatCompletionRequest, 5 28 ChatMessage, 6 - ChatCompletionRequest, 29 + GenerationMetrics, 30 + ResponsesRequest, 7 31 ResponsesResponse, 8 32 downloadRequest, 9 - GenerationMetrics, 10 - ResponsesRequest, 11 33 ) 12 - from ..hf_downloader import pull_model 13 - 14 - import logging 15 - import json 16 - import time 17 - import uuid 18 - from collections.abc import AsyncGenerator 34 + from .mlx_runner import MLXRunner 19 35 20 36 logger = logging.getLogger("app") 21 37 ··· 36 52 raise HTTPException(status_code=400, detail="Downloading model failed") 37 53 38 54 39 - def get_or_load_model(model_spec: str, verbose: bool = False) -> MLXRunner: 55 + def get_or_load_model(model_spec: str, verbose: bool = True) -> MLXRunner: 40 56 """Get model from cache or load it if not cached.""" 41 57 global _model_cache, _current_model_path 42 58 ··· 204 220 205 221 def _prepend_previous_response(user_input: str, prev_id: Optional[str]) -> str: 206 222 """If prev_id points to a stored response, prepend its output text as context.""" 223 + 207 224 if not prev_id: 208 225 return user_input 209 - prev = _responses.get(prev_id) 226 + 227 + prev_id = json.loads(prev_id) 228 + 229 + prev = _responses.get(prev_id) # pyright: ignore 230 + 210 231 if not prev or not getattr(prev, "output", None): 211 232 return user_input 212 233 prev_text_parts: List[str] = [] ··· 219 240 return user_input 220 241 221 242 222 - def _calc_usage(runner: MLXRunner, input_text: str, generated_text: str) -> Dict[str, int]: 243 + def _calc_usage( 244 + runner: MLXRunner, input_text: str, generated_text: str 245 + ) -> Dict[str, int]: 223 246 """Calculate token usage using the runner tokenizer; fall back to zeros on error.""" 224 247 try: 225 248 input_tokens = len(runner.tokenizer.encode(input_text)) ··· 237 260 status: str, 238 261 output: List[Dict[str, Any]], 239 262 usage: Dict[str, int], 263 + error: Error | None = None, 264 + incomplete_details: IncompleteDetails | None = None, 240 265 metrics: Optional[Dict[str, Any]] = None, 241 - error: Optional[Dict[str, Any]] = None, 242 266 ) -> ResponsesResponse: 243 267 """Create a ResponsesResponse, attach metrics to metadata and store it in `_responses`.""" 244 268 resp = ResponsesResponse( ··· 251 275 error=error, 252 276 output=output, 253 277 usage=usage, 278 + incomplete_details=incomplete_details, 254 279 ) 255 280 if metrics: 256 281 try: ··· 269 294 return int(len(text.split()) * 1.3) # Approximation, convert to int 270 295 271 296 297 + def handle_response_input(request: ResponsesRequest): 298 + dev_msg_item = None 299 + user_msg_item = None 300 + user_input_content = "" 301 + if isinstance(request.input, str): 302 + user_input_content = request.input 303 + else: 304 + for item in request.input: 305 + match item: 306 + case UserMessageItemParam(): 307 + user_msg_item = item 308 + user_input_content = item.content.root # pyright: ignore 309 + case DeveloperMessageItemParam(): 310 + dev_msg_item = item 311 + case _: 312 + raise TypeError("unknown type") 313 + return [user_input_content, user_msg_item, dev_msg_item] 314 + 315 + 272 316 async def generate_response_chat_stream( 273 - request: ResponsesRequest 317 + request: ResponsesRequest, 274 318 ) -> AsyncGenerator[str, None]: 275 319 """Generate streaming chat responses for Responses API.""" 276 - 277 - model = request.model or "mlx-community/gpt-oss-20b-MXFP4-Q4" 278 - user_input = request.input or "" 279 - response_id = f"resp-{uuid.uuid4()}" 280 - msg_id = f"msg_{uuid.uuid4()}" 320 + model = request.model 281 321 created = int(time.time()) 282 322 runner = get_or_load_model(model) 283 323 metrics = None 284 - # If a previous_response_id is provided, prepend its text to the prompt 285 - prev_id = getattr(request, "previous_response_id", None) 286 - user_input = _prepend_previous_response(user_input, prev_id) 287 324 288 - # Calculate input tokens once 289 - input_tokens = len(runner.tokenizer.encode(user_input)) 325 + user_input_content = "" 326 + 327 + dev_msg_item = None 328 + user_msg_item = None 329 + [user_input_content, user_msg_item, dev_msg_item] = handle_response_input(request) 330 + user_input_content = _prepend_previous_response( 331 + user_input_content, request.previous_response_id 332 + ) 333 + 334 + reasoning_effort = get_reasoning_effort(request.reasoning.effort) 335 + 336 + convo = build_harmony_conversation( 337 + reasoning_effort, dev_msg_item, user_input_content 338 + ) 339 + 340 + input_tokens = len(runner.tokenizer.encode(user_input_content)) # pyright: ignore 290 341 291 342 # Initial chunk 292 343 initial_chunk = { 293 - "id": response_id, 344 + "id": f"resp_{uuid.uuid4()}", 294 345 "object": "response.chunk", 295 346 "created_at": created, 296 347 "model": model, ··· 298 349 "output": [ 299 350 { 300 351 "type": "message", 301 - "id": msg_id, 352 + "id": f"msg_{uuid.uuid4()}", 302 353 "status": "in_progress", 303 354 "role": "assistant", 304 355 "content": [], ··· 307 358 "usage": {"input_tokens": input_tokens, "output_tokens": 0}, 308 359 } 309 360 yield f"data: {json.dumps(initial_chunk)}\n\n" 310 - 311 - # Stream tokens 361 + 312 362 accumulated_text = "" 363 + answer_text = "" 313 364 output_tokens = 0 365 + error = None 366 + incomplete_details = None 367 + has_answer_started: bool = False 368 + # TODO: we need to inject the context prepending, else model is losing it. 314 369 try: 315 - for token in runner.generate_streaming( 316 - prompt=user_input, 370 + for token in runner.generate_streaming_gpt( 371 + conversation=convo, 317 372 max_tokens=runner.get_effective_max_tokens(request.max_output_tokens), 318 373 temperature=request.temperature or 1, 319 374 top_p=request.top_p or 1, 320 - use_chat_template=True, 321 375 ): 322 376 if isinstance(token, GenerationMetrics): 323 377 metrics = token 324 378 continue 325 - 379 + 380 + if not isinstance(token, str): 381 + continue 382 + 383 + if "**[Answer]**" in token or has_answer_started: 384 + has_answer_started = True 385 + answer_text += token 386 + 326 387 accumulated_text += token 327 388 output_tokens += 1 # Each yield is one token 328 - 389 + 329 390 chunk = { 330 - "id": response_id, 391 + "id": f"resp_{uuid.uuid4()}", 331 392 "object": "response.chunk", 332 393 "created_at": created, 333 394 "model": model, ··· 335 396 "output": [ 336 397 { 337 398 "type": "message", 338 - "id": msg_id, 399 + "id": f"msg_{uuid.uuid4()}", 339 400 "status": "in_progress", 340 401 "role": "assistant", 341 402 "content": [ ··· 350 411 "usage": {"input_tokens": input_tokens, "output_tokens": output_tokens}, 351 412 } 352 413 yield f"data: {json.dumps(chunk)}\n\n" 353 - 414 + 354 415 except Exception as e: 416 + error = {"message": str(e), "code": "500"} 417 + incomplete_details = {"reason": "internal server error"} 418 + 355 419 error_chunk = { 356 - "id": response_id, 420 + "id": f"resp_{uuid.uuid4()}", 357 421 "object": "response.chunk", 358 422 "created_at": created, 359 423 "model": model, 360 424 "status": "failed", 361 - "error": {"message": str(e), "type": "internal_error"}, 425 + "error": error, 426 + "incomplete_details": incomplete_details, 362 427 "output": [], 363 428 "usage": {"input_tokens": input_tokens, "output_tokens": output_tokens}, 364 429 } 365 430 yield f"data: {json.dumps(error_chunk)}\n\n" 366 431 return 367 - 432 + 368 433 # Final chunk 369 434 completed_at = int(time.time()) 370 435 # Build final chunk with accumulated text and store response for follow-ups 436 + 371 437 final_chunk = { 372 - "id": response_id, 438 + "id": f"resp_{uuid.uuid4()}", 373 439 "object": "response.chunk", 374 440 "created_at": created, 375 441 "completed_at": completed_at, ··· 378 444 "output": [ 379 445 { 380 446 "type": "message", 381 - "id": msg_id, 447 + "id": f"msg_{uuid.uuid4()}", 382 448 "status": "completed", 383 449 "role": "assistant", 384 450 "content": [ 385 451 { 386 452 "type": "output_text", 387 - "text": "", 453 + "text": answer_text, 388 454 "annotations": [], 389 455 } 390 456 ], ··· 392 458 ], 393 459 "usage": {"input_tokens": input_tokens, "output_tokens": output_tokens}, 394 460 } 461 + 395 462 # Store and return a typed ResponsesResponse for follow-ups 396 463 metrics_obj = None 397 464 if metrics: ··· 404 471 final_chunk["metrics"] = metrics_obj 405 472 406 473 _store_response( 407 - response_id=response_id, 474 + response_id=final_chunk["id"], 408 475 created=created, 409 476 completed_at=completed_at, 410 477 model=model, ··· 418 485 419 486 420 487 async def generate_response_chat(request: ResponsesRequest): 421 - """Generate chat responses""" 488 + """Generate chat responses for Responses API""" 422 489 423 - model = request.model or "mlx-community/gpt-oss-20b-MXFP4-Q4" 424 - user_input = request.input or "" 490 + model = request.model 425 491 response_id = f"resp-{uuid.uuid4()}" 426 492 msg_id = f"msg_{uuid.uuid4()}" 427 493 created = int(time.time()) 428 494 runner = get_or_load_model(model) 429 495 430 - # If a previous_response_id is provided, prepend its text to the prompt 431 - prev_id = getattr(request, "previous_response_id", None) 432 - user_input = _prepend_previous_response(user_input, prev_id) 496 + user_input_content = "" 497 + 498 + dev_msg_item = None 499 + user_msg_item = None 500 + [user_input_content, user_msg_item, dev_msg_item] = handle_response_input(request) 501 + user_input_content = _prepend_previous_response( 502 + user_input_content, request.previous_response_id 503 + ) 504 + 505 + reasoning_effort = get_reasoning_effort(request.reasoning.effort) 506 + 507 + convo = build_harmony_conversation( 508 + reasoning_effort, dev_msg_item, user_input_content 509 + ) 433 510 434 511 metrics_obj = None 512 + error = None 513 + incomplete_details = None 514 + 435 515 try: 436 516 start_time = time.time() 437 - generated_text = runner.generate_batch( 438 - prompt=user_input, 517 + generated_text = runner.generate_batch_gpt( 518 + conversation=convo, 439 519 max_tokens=runner.get_effective_max_tokens(request.max_output_tokens), 440 520 temperature=request.temperature or 1, 441 521 top_p=request.top_p or 1, ··· 448 528 completed_at = int(time.time()) 449 529 status = "completed" 450 530 error = None 451 - 531 + incomplete_details = None 452 532 # Calculate token usage 453 - usage = _calc_usage(runner, user_input, generated_text) 533 + usage = _calc_usage(runner, user_input_content, generated_text) 454 534 output_tokens = usage.get("output_tokens", 0) 455 535 metrics_obj = { 456 536 "ttft_ms": generation_time * 1000.0, 457 537 "total_tokens": output_tokens, 458 - "tokens_per_second": (output_tokens / generation_time) if generation_time > 0 else 0.0, 538 + "tokens_per_second": (output_tokens / generation_time) 539 + if generation_time > 0 540 + else 0.0, 459 541 "total_latency_s": generation_time, 460 542 } 461 543 462 544 except Exception as e: 463 545 completed_at = None 464 546 status = "failed" 465 - error = {"message": str(e), "type": "internal_error"} 547 + error = {"message": str(e), "code": "500"} 548 + incomplete_details = {"reason": "internal server error"} 466 549 generated_text = "" 467 550 usage = {"input_tokens": 0, "output_tokens": 0} 468 551 469 - output_block = [ 470 - { 471 - "type": "message", 472 - "id": msg_id, 473 - "status": "completed" if status == "completed" else "failed", 474 - "role": "assistant", 475 - "content": [ 476 - {"type": "output_text", "text": generated_text, "annotations": []} 477 - ], 478 - } 479 - ] if status == "completed" else [] 552 + output_block = ( 553 + [ 554 + { 555 + "type": "message", 556 + "id": msg_id, 557 + "status": "completed" if status == "completed" else "failed", 558 + "role": "assistant", 559 + "content": [ 560 + {"type": "output_text", "text": generated_text, "annotations": []} 561 + ], 562 + } 563 + ] 564 + if status == "completed" 565 + else [] 566 + ) 480 567 481 568 resp = _store_response( 482 569 response_id=response_id, ··· 486 573 status=status, 487 574 output=output_block, 488 575 usage=usage, 489 - metrics=(metrics_obj if status == "completed" else None), 490 576 error=error, 577 + incomplete_details=incomplete_details, 578 + metrics=(metrics_obj if status == "completed" else None), 491 579 ) 492 580 493 581 return resp 582 + 583 + 584 + def get_reasoning_effort(reasoning_effort_enum: ReasoningEffortEnum | None): 585 + reasoning_effort: ReasoningEffort 586 + match reasoning_effort_enum: 587 + case ReasoningEffortEnum.high: 588 + reasoning_effort = ReasoningEffort.HIGH 589 + case ReasoningEffortEnum.medium: 590 + reasoning_effort = ReasoningEffort.MEDIUM 591 + case ReasoningEffortEnum.low: 592 + reasoning_effort = ReasoningEffort.LOW 593 + case ReasoningEffortEnum.xhigh: 594 + reasoning_effort = ReasoningEffort.HIGH 595 + case _: 596 + raise TypeError("unknow reasoing effort") 597 + return reasoning_effort 598 + 599 + 600 + def build_harmony_conversation( 601 + reasoning_effort: ReasoningEffort, 602 + dev_msg_item: DeveloperMessageItemParam | None, 603 + user_input: str, 604 + ): 605 + system_message = SystemContent.new().with_reasoning_effort(reasoning_effort) 606 + dev_message: DeveloperContent = DeveloperContent.new() 607 + if isinstance(dev_msg_item, DeveloperMessageItemParam): 608 + dev_message = DeveloperContent.new().with_instructions( 609 + dev_msg_item.content.root 610 + ) # pyright: ignore 611 + 612 + convo = Conversation.from_messages( 613 + [ 614 + Message.from_role_and_content(Role.SYSTEM, system_message), 615 + Message.from_role_and_content(Role.DEVELOPER, dev_message), 616 + Message.from_role_and_content(Role.USER, user_input), 617 + ] 618 + ) 619 + return convo

+205 -3

server/backend/mlx_runner.py

··· 7 7 import os 8 8 import sys 9 9 import time 10 + from ast import Yield 10 11 from collections.abc import Iterator 11 12 from pathlib import Path 12 13 from typing import Dict, Optional 14 + 15 + from mlx_lm.tokenizer_utils import TokenizerWrapper 13 16 14 17 if sys.platform == "darwin": 15 18 import mlx.core as mx ··· 18 21 from mlx_lm import load 19 22 from mlx_lm.generate import generate_step 20 23 from mlx_lm.sample_utils import make_repetition_penalty, make_sampler 24 + from openai_harmony import ( 25 + Conversation, 26 + HarmonyEncodingName, 27 + Message, 28 + Role, 29 + StreamableParser, 30 + SystemContent, 31 + load_harmony_encoding, 32 + ) 21 33 22 34 from ..reasoning_utils import ReasoningExtractor, StreamingReasoningParser 23 35 from ..schemas import GenerationMetrics ··· 62 74 class MLXRunner: 63 75 """Direct MLX model runner with streaming and interactive capabilities.""" 64 76 77 + model_path: Path 78 + adapter_path: str | None 79 + model: object | None 80 + tokenizer: TokenizerWrapper | None 81 + _memory_baseline: float | None 82 + _stop_tokens: list[str] | None 83 + _message_end_tokens: list[str] | None 84 + _chat_stop_tokens: list[str] | None 85 + _context_length: int | None 86 + _is_reasoning_model: bool 87 + _reasoning_start: str | None 88 + _reasoning_end: str | None 89 + _final_start: str | None 90 + verbose: bool 91 + _model_loaded: bool 92 + _context_entered: bool 93 + 65 94 def __init__( 66 - self, model_path: str, adapter_path: Optional[str] = None, verbose: bool = False 95 + self, model_path: str, adapter_path: str | None = None, verbose: bool = False 67 96 ): 68 97 """Initialize the runner with a model. 69 98 ··· 113 142 return False # Don't suppress exceptions 114 143 115 144 def load_model(self): 145 + if mx is None: 146 + raise RuntimeError("MLX runtime not available in current runtime") 116 147 """Load the MLX model and tokenizer.""" 117 148 if self._model_loaded: 118 149 if self.verbose: ··· 132 163 133 164 try: 134 165 # Load model and tokenizer 135 - self.model, self.tokenizer = load( 166 + self.model, self.tokenizer, *_ = load( 136 167 str(self.model_path), adapter_path=self.adapter_path 137 168 ) 138 169 ··· 410 441 server_limit = self._context_length // 2 411 442 return min(requested_tokens or server_limit, server_limit) 412 443 444 + def generate_streaming_gpt( 445 + self, 446 + conversation: Conversation, 447 + max_tokens: int = 500, 448 + temperature: float = 0.7, 449 + top_p: float = 0.9, 450 + repetition_penalty: float = 1.1, 451 + repetition_context_size: int = 20, 452 + ) -> Iterator[str]: 453 + if not self.model or not self.tokenizer: 454 + raise RuntimeError("Model not loaded. Call load_model() first.") 455 + 456 + effective_max_tokens = self.get_effective_max_tokens(max_tokens, False) 457 + 458 + encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) 459 + effective_max_tokens = self.get_effective_max_tokens(max_tokens, False) 460 + 461 + prompt_tokens = encoding.render_conversation_for_completion( 462 + conversation, Role.ASSISTANT 463 + ) 464 + 465 + prompt_array = mx.array(prompt_tokens) # pyright: ignore 466 + 467 + start_time = time.time() 468 + tokens_generated = 0 469 + ttft = None 470 + 471 + sampler = make_sampler(temp=temperature, top_p=top_p) 472 + 473 + # Create repetition penalty processor if needed 474 + logits_processors = [] 475 + if repetition_penalty > 1.0: 476 + logits_processors.append( 477 + make_repetition_penalty(repetition_penalty, repetition_context_size) 478 + ) 479 + 480 + # Generate tokens one by one for streaming 481 + generator = generate_step( 482 + prompt=prompt_array, 483 + model=self.model, # pyright: ignore 484 + max_tokens=effective_max_tokens, 485 + sampler=sampler, 486 + logits_processors=logits_processors if logits_processors else None, 487 + ) 488 + 489 + parser = StreamableParser(encoding, Role.ASSISTANT) 490 + 491 + # Collect tokens and yield text 492 + generated_tokens = [] 493 + is_analysis = None 494 + is_final = None 495 + for token, _ in generator: 496 + token_id = token.item() if hasattr(token, "item") else token 497 + parser.process(token_id) # pyright: ignore 498 + 499 + generated_tokens.append(token_id) 500 + 501 + if is_analysis is None and parser.current_channel == "analysis": 502 + is_analysis = True 503 + yield "**[Reasoning]**\n\n" 504 + 505 + if is_final is None and parser.current_channel == "final": 506 + is_final = True 507 + yield "\n\n---\n\n**[Answer]**\n\n" 508 + 509 + if ttft is None: 510 + ttft = time.time() - start_time 511 + 512 + yield parser.last_content_delta # pyright: ignore 513 + 514 + tokens_generated += 1 515 + 516 + # Check for EOS token - don't yield it 517 + 518 + if token_id == self.tokenizer.eos_token_id: 519 + break 520 + 521 + # Yield metrics at the end 522 + total_latency = time.time() - start_time 523 + tokens_per_second = tokens_generated / total_latency if total_latency > 0 else 0 524 + ttft_ms = (ttft * 1000) if ttft is not None else 0 525 + metrics = GenerationMetrics( 526 + ttft_ms=ttft_ms, 527 + total_tokens=tokens_generated, 528 + tokens_per_second=tokens_per_second, 529 + total_latency_s=total_latency, 530 + ) 531 + yield metrics 532 + 533 + # Print generation statistics if verbose 534 + if self.verbose: 535 + generation_time = time.time() - start_time 536 + tokens_per_second = ( 537 + tokens_generated / generation_time if generation_time > 0 else 0 538 + ) 539 + print( 540 + f"\n\nGenerated {tokens_generated} tokens in {generation_time:.1f}s ({tokens_per_second:.1f} tokens/s)" 541 + ) 542 + 413 543 def generate_streaming( 414 544 self, 415 545 prompt: str, ··· 670 800 f"\n\nGenerated {tokens_generated} tokens in {generation_time:.1f}s ({tokens_per_second:.1f} tokens/s)" 671 801 ) 672 802 803 + def generate_batch_gpt( 804 + self, 805 + conversation: Conversation, 806 + max_tokens: int = 500, 807 + temperature: float = 0.7, 808 + top_p: float = 0.9, 809 + repetition_penalty: float = 1.0, 810 + repetition_context_size: int = 20, 811 + use_chat_template: bool = True, 812 + interactive: bool = False, 813 + ) -> str: 814 + """ 815 + Generate text in batch mode (non-streaming) but for 816 + """ 817 + 818 + if not self.model or not self.tokenizer: 819 + raise RuntimeError("Model not loaded. Call load_model() first.") 820 + 821 + # lets do stuff for harmoy 822 + encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) 823 + effective_max_tokens = self.get_effective_max_tokens(max_tokens, interactive) 824 + 825 + prompt_tokens = encoding.render_conversation_for_completion( 826 + conversation, Role.ASSISTANT 827 + ) 828 + 829 + prompt_array = mx.array(prompt_tokens) 830 + sampler = make_sampler(temp=temperature, top_p=top_p) 831 + logits_processors = [] 832 + 833 + # TODO: Maybe add repetition penalty 834 + generator = generate_step( 835 + prompt=prompt_array, 836 + model=self.model, 837 + max_tokens=effective_max_tokens, 838 + sampler=sampler, 839 + logits_processors=logits_processors if logits_processors else None, 840 + ) 841 + 842 + generated_tokens = [] 843 + all_tokens = [] 844 + 845 + for token, _ in generator: 846 + # Token might be an array or an int 847 + token_id = token.item() if hasattr(token, "item") else token 848 + generated_tokens.append(token_id) 849 + all_tokens.append(token_id) 850 + 851 + # Check for EOS token - don't yield it 852 + if token_id == self.tokenizer.eos_token_id: 853 + break 854 + 855 + response = encoding.parse_messages_from_completion_tokens( 856 + generated_tokens, Role.ASSISTANT 857 + ) 858 + 859 + reasoning_texts = [ 860 + msg.content[0].text for msg in response if msg.channel == "analysis" 861 + ] 862 + final_texts = [ 863 + msg.content[0].text for msg in response if msg.channel != "analysis" 864 + ] 865 + 866 + # Concatenate the lists and turn into a single string. 867 + all_texts = reasoning_texts + final_texts 868 + combined_text = "\n\n".join(filter(None, all_texts)) 869 + 870 + # if they are 2 different fields, then 871 + 872 + return combined_text 873 + 673 874 def generate_batch( 674 875 self, 675 876 prompt: str, ··· 712 913 formatted_prompt = self.tokenizer.apply_chat_template( 713 914 messages, tokenize=False, add_generation_prompt=True 714 915 ) 916 + 715 917 else: 716 918 formatted_prompt = prompt 717 919 ··· 753 955 if token_id == self.tokenizer.eos_token_id: 754 956 break 755 957 958 + print(f"all tokens\n{all_tokens}") 756 959 # Decode all tokens together for proper spacing 757 960 full_response = self.tokenizer.decode(all_tokens) 758 961 ··· 768 971 response, use_chat_stop_tokens=False 769 972 ) 770 973 771 - # Format reasoning models output 772 974 response = self._format_reasoning_response(response) 773 975 774 976 generation_time = time.time() - start_time

+2

server/pyproject.toml

··· 9 9 "mlx-lm", 10 10 "black", 11 11 "huggingface-hub>=0.34.0", 12 + "openai-harmony==0.0.8", 13 + "openresponses-types" 12 14 ] 13 15 14 16 [build-system]

+2 -2

server/pyrightconfig.json

··· 1 1 { 2 2 "venvPath": ".", 3 - "venv": ".venv" 3 + "venv": ".venv", 4 + "typeCheckingMode": "basic", 4 5 } 5 -

+80 -33

server/schemas.py

··· 1 + from dataclasses import dataclass 2 + from enum import Enum, auto 3 + from typing import Any, Dict, List, Union, override 4 + 5 + from openresponses_types import ReasoningParam, TruncationEnum 6 + from openresponses_types.types import ( 7 + AssistantMessageItemParam, 8 + DeveloperMessageItemParam, 9 + Error, 10 + FunctionCallItemParam, 11 + FunctionCallOutputItemParam, 12 + FunctionToolParam, 13 + IncompleteDetails, 14 + ItemReferenceParam, 15 + ReasoningEffortEnum, 16 + ReasoningItemParam, 17 + StreamOptionsParam, 18 + SystemMessageItemParam, 19 + ToolChoiceParam, 20 + UserMessageItemParam, 21 + ) 1 22 from pydantic import BaseModel, Field 2 - from typing import Any, Dict, List, Optional, Union 3 - from dataclasses import dataclass 4 23 5 24 6 25 class CompletionRequest(BaseModel): 7 26 model: str 8 27 prompt: Union[str, List[str]] 9 - max_tokens: Optional[int] = None 10 - temperature: Optional[float] = 0.7 11 - top_p: Optional[float] = 0.9 12 - stream: Optional[bool] = False 13 - stop: Optional[Union[str, List[str]]] = None 14 - repetition_penalty: Optional[float] = 1.1 28 + max_tokens: int | None = None 29 + temperature: float | None = 0.7 30 + top_p: float | None = 0.9 31 + stream: bool | None = False 32 + stop: Union[str, List[str]] | None = None 33 + repetition_penalty: float | None = 1.1 15 34 16 35 17 36 class ChatMessage(BaseModel): ··· 24 43 messages: List[ChatMessage] 25 44 chat_start: bool 26 45 python_code: str 27 - max_tokens: Optional[int] = None 28 - temperature: Optional[float] = 0.7 29 - top_p: Optional[float] = 0.9 30 - stream: Optional[bool] = False 31 - stop: Optional[Union[str, List[str]]] = None 32 - repetition_penalty: Optional[float] = 1.1 46 + max_tokens: int | None = None 47 + temperature: float | None = 0.7 48 + top_p: float | None = 0.9 49 + stream: bool | None = False 50 + stop: Union[str, List[str]] | None = None 51 + repetition_penalty: float | None = 1.1 33 52 34 53 35 54 class CompletionResponse(BaseModel): ··· 55 74 object: str = "model" 56 75 owned_by: str = "mlx-knife" 57 76 permission: List = [] 58 - context_length: Optional[int] = None 77 + context_length: int | None = None 59 78 60 79 61 80 class StartRequest(BaseModel): ··· 69 88 70 89 71 90 class ResponsesRequest(BaseModel): 72 - model: Optional[str] = None 73 - input: Optional[str] = None 74 - reasoning: Optional[Dict[str, Any]] = None 75 - previous_response_id: Optional[str] = None 76 - stream: Optional[bool] = False 77 - tools: Optional[List[Dict[str, Any]]] = None 78 - temperature: Optional[float] = 1 79 - top_p: Optional[float] = 1 80 - max_output_tokens: Optional[int] = None 91 + model: str = "mlx-community/gpt-oss-20b-MXFP4-Q4" 92 + input: ( 93 + str 94 + | list[ 95 + ItemReferenceParam 96 + | ReasoningItemParam 97 + | UserMessageItemParam 98 + | SystemMessageItemParam 99 + | DeveloperMessageItemParam 100 + | AssistantMessageItemParam 101 + | FunctionCallItemParam 102 + | FunctionCallOutputItemParam 103 + ] 104 + ) 105 + reasoning: ReasoningParam = ReasoningParam( 106 + effort=ReasoningEffortEnum.medium, summary=None 107 + ) 108 + previous_response_id: str | None = None 109 + stream: bool | None = False 110 + stream_options: StreamOptionsParam | None = None 111 + tools: list[FunctionToolParam] | None = None 112 + tool_choice: ToolChoiceParam | None = None 113 + temperature: float | None = 1 114 + top_p: float | None = 1 115 + max_output_tokens: int | None = None 116 + store: bool = False 117 + # other service tiers are default, flex, priority 118 + service_tier: str = "auto" 119 + top_logprobs: int = 0 120 + # can put in the Developer msg if none there 121 + instructions: str | None = None 122 + # auto/disabled, returns 400 on disabled 123 + truncation: TruncationEnum = TruncationEnum.disabled 124 + prompt_cache: str | None = None 125 + safety_identifier: str | None = None 126 + max_tool_calls: int | None = None 127 + background: bool = False 81 128 82 129 83 130 class ResponsesResponse(BaseModel): ··· 85 132 object: str = "response" 86 133 created_at: int 87 134 status: str 88 - completed_at: Optional[int] = None 89 - error: Optional[Dict[str, Any]] = None 90 - incomplete_details: Optional[Dict[str, Any]] = None 91 - instructions: Optional[str] = None 92 - max_output_tokens: Optional[int] = None 135 + completed_at: int | None = None 136 + error: Error | None = None 137 + incomplete_details: IncompleteDetails | None = None 138 + instructions: str | None = None 139 + max_output_tokens: int | None = None 93 140 model: str 94 - output: List[Dict[str, Any]] 141 + output: list[Dict[str, Any]] 95 142 parallel_tool_calls: bool = True 96 - previous_response_id: Optional[str] = None 97 - reasoning: Optional[Dict[str, Any]] = Field(default_factory=dict) 143 + previous_response_id: str = "" 144 + reasoning: Dict[str, Any] | None = Field(default_factory=dict) 98 145 store: bool = True 99 146 temperature: float = 1.0 100 147 text: Dict[str, Any] = Field(default_factory=lambda: {"format": {"type": "text"}}) ··· 103 150 top_p: float = 1.0 104 151 truncation: str = "disabled" 105 152 usage: Dict[str, Any] 106 - user: Optional[str] = None 153 + user: str | None = None 107 154 metadata: Dict[str, Any] = Field(default_factory=dict) 108 155 109 156

+6 -4

server/stack/requirements/app-server/packages-app-server.txt

··· 7 7 charset-normalizer==3.4.4 8 8 click==8.3.1 9 9 fastapi==0.119.0 10 - filelock==3.24.3 10 + filelock==3.25.0 11 11 fsspec==2026.2.0 12 12 h11==0.16.0 13 - hf-xet==1.3.1 13 + hf-xet==1.3.2 14 14 huggingface-hub==0.35.0 15 15 idna==3.11 16 16 jinja2==3.1.6 ··· 18 18 mlx-lm==0.28.3 19 19 mypy-extensions==1.1.0 20 20 numpy==2.4.2 21 + openai-harmony==0.0.8 22 + openresponses-types==2.3.0.post1 21 23 packaging==26.0 22 24 pathspec==1.0.4 23 25 platformdirs==4.9.2 24 - protobuf==6.33.5 26 + protobuf==7.34.0 25 27 pydantic==2.12.5 26 28 pydantic-core==2.41.5 27 29 pytokens==0.4.1 28 30 pyyaml==6.0.3 29 - regex==2026.2.19 31 + regex==2026.2.28 30 32 requests==2.32.5 31 33 safetensors==0.7.0 32 34 starlette==0.48.0

+3 -3

server/stack/requirements/app-server/pylock.app-server.meta.json

··· 1 1 { 2 - "lock_input_hash": "sha256:182c606e20dd957344cc3adc54391f47f4b6dd80b4481ddf219392a7aad6e0ce", 2 + "lock_input_hash": "sha256:c836d5cfb697330a57241b2b8f275a804178488ec906b19866809ef33c95ba81", 3 3 "lock_version": 1, 4 - "locked_at": "2026-02-25T13:24:58.188888+00:00", 4 + "locked_at": "2026-03-01T18:20:47.939345+00:00", 5 5 "other_inputs_hash": "sha256:63b3c2cfe2ec414938e81dace7aac779c7b902bae681618cd8827e9f16880985", 6 - "requirements_hash": "sha256:dc0d11b6a0897aff3ae64d3bda37f52b66dd75932f713491457eeea1b68c3fde", 6 + "requirements_hash": "sha256:bc1a0df3c15a1fad6f446be81a50a01835c1a11ff412440842f751ecdce9cbdd", 7 7 "version_inputs_hash": "sha256:58db986b7cd72eeded675f7c9afd8138fe024fb51451131b5562922bbde3cf43" 8 8 }

+118 -76

server/stack/requirements/app-server/pylock.app-server.toml

··· 138 138 139 139 [[packages]] 140 140 name = "filelock" 141 - version = "3.24.3" 141 + version = "3.25.0" 142 142 index = "https://pypi.org/simple" 143 143 144 144 [[packages.wheels]] 145 - url = "https://files.pythonhosted.org/packages/9c/0f/5d0c71a1aefeb08efff26272149e07ab922b64f46c63363756224bd6872e/filelock-3.24.3-py3-none-any.whl" 146 - upload-time = 2026-02-19T00:48:18Z 147 - size = 24331 145 + url = "https://files.pythonhosted.org/packages/f9/0b/de6f54d4a8bedfe8645c41497f3c18d749f0bd3218170c667bf4b81d0cdd/filelock-3.25.0-py3-none-any.whl" 146 + upload-time = 2026-03-01T15:08:44Z 147 + size = 26427 148 148 149 149 [packages.wheels.hashes] 150 - sha256 = "426e9a4660391f7f8a810d71b0555bce9008b0a1cc342ab1f6947d37639e002d" 150 + sha256 = "5ccf8069f7948f494968fc0713c10e5c182a9c9d9eef3a636307a20c2490f047" 151 151 152 152 [[packages]] 153 153 name = "fsspec" ··· 177 177 178 178 [[packages]] 179 179 name = "hf-xet" 180 - version = "1.3.1" 180 + version = "1.3.2" 181 181 index = "https://pypi.org/simple" 182 182 183 183 [[packages.wheels]] 184 - url = "https://files.pythonhosted.org/packages/d4/de/72acb8d7702b3cf9b36a68e8380f3114bf04f9f21cf9e25317457fe31f00/hf_xet-1.3.1-cp313-cp313t-macosx_11_0_arm64.whl" 185 - upload-time = 2026-02-25T00:57:39Z 186 - size = 3518075 184 + url = "https://files.pythonhosted.org/packages/35/56/987b0537ddaf88e17192ea09afa8eca853e55f39a4721578be436f8409df/hf_xet-1.3.2-cp313-cp313t-macosx_11_0_arm64.whl" 185 + upload-time = 2026-02-27T17:25:47Z 186 + size = 3521565 187 187 188 188 [packages.wheels.hashes] 189 - sha256 = "0810b69c64e96dee849036193848007f665dca2311879c9ea8693f4fc37f1795" 189 + sha256 = "c1ae4d3a716afc774e66922f3cac8206bfa707db13f6a7e62dfff74bfc95c9a8" 190 190 191 191 [[packages.wheels]] 192 - url = "https://files.pythonhosted.org/packages/1d/5c/ed728d8530fec28da88ee882b522fccf00dc98e9d7bae4cdb0493070cb17/hf_xet-1.3.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" 193 - upload-time = 2026-02-25T00:57:32Z 194 - size = 4174369 192 + url = "https://files.pythonhosted.org/packages/a8/5c/7e4a33a3d689f77761156cc34558047569e54af92e4d15a8f493229f6767/hf_xet-1.3.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" 193 + upload-time = 2026-02-27T17:25:40Z 194 + size = 4176494 195 195 196 196 [packages.wheels.hashes] 197 - sha256 = "ecd38f98e7f0f41108e30fd4a9a5553ec30cf726df7473dd3e75a1b6d56728c2" 197 + sha256 = "d6dbdf231efac0b9b39adcf12a07f0c030498f9212a18e8c50224d0e84ab803d" 198 198 199 199 [[packages.wheels]] 200 - url = "https://files.pythonhosted.org/packages/df/31/de07e26e396f46d13a09251df69df9444190e93e06a9d30d639e96c8a0ed/hf_xet-1.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl" 201 - upload-time = 2026-02-25T00:57:49Z 202 - size = 4390709 200 + url = "https://files.pythonhosted.org/packages/e2/e1/3af961f71a40e09bf5ee909842127b6b00f5ab4ee3817599dc0771b79893/hf_xet-1.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl" 201 + upload-time = 2026-02-27T17:25:58Z 202 + size = 4394161 203 203 204 204 [packages.wheels.hashes] 205 - sha256 = "b3012c0f2ce1f0863338491a2bc0fd3f84aded0e147ab25f230da1f5249547fd" 205 + sha256 = "35b855024ca37f2dd113ac1c08993e997fbe167b9d61f9ef66d3d4f84015e508" 206 206 207 207 [[packages.wheels]] 208 - url = "https://files.pythonhosted.org/packages/c0/e5/a2f3eaae09da57deceb16a96ebe9ae1f6f7b9b94145a9cd3c3f994e7782a/hf_xet-1.3.1-cp37-abi3-macosx_11_0_arm64.whl" 209 - upload-time = 2026-02-25T00:57:42Z 210 - size = 3523677 208 + url = "https://files.pythonhosted.org/packages/e4/71/b99aed3823c9d1795e4865cf437d651097356a3f38c7d5877e4ac544b8e4/hf_xet-1.3.2-cp37-abi3-macosx_11_0_arm64.whl" 209 + upload-time = 2026-02-27T17:25:50Z 210 + size = 3526171 211 211 212 212 [packages.wheels.hashes] 213 - sha256 = "329c80c86f2dda776bafd2e4813a46a3ee648dce3ac0c84625902c70d7a6ddba" 213 + sha256 = "a85d3d43743174393afe27835bde0cd146e652b5fcfdbcd624602daef2ef3259" 214 214 215 215 [[packages.wheels]] 216 - url = "https://files.pythonhosted.org/packages/61/cd/acbbf9e51f17d8cef2630e61741228e12d4050716619353efc1ac119f902/hf_xet-1.3.1-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" 217 - upload-time = 2026-02-25T00:57:35Z 218 - size = 4178557 216 + url = "https://files.pythonhosted.org/packages/9d/ca/907890ce6ef5598b5920514f255ed0a65f558f820515b18db75a51b2f878/hf_xet-1.3.2-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" 217 + upload-time = 2026-02-27T17:25:43Z 218 + size = 4180750 219 219 220 220 [packages.wheels.hashes] 221 - sha256 = "2973c3ff594c3a8da890836308cae1444c8af113c6f10fe6824575ddbc37eca7" 221 + sha256 = "7c2a054a97c44e136b1f7f5a78f12b3efffdf2eed3abc6746fc5ea4b39511633" 222 222 223 223 [[packages.wheels]] 224 - url = "https://files.pythonhosted.org/packages/08/9c/b667098a636a88358dbeb2caf90e3cb9e4b961f61f6c55bb312793424def/hf_xet-1.3.1-cp37-abi3-musllinux_1_2_x86_64.whl" 225 - upload-time = 2026-02-25T00:57:52Z 226 - size = 4395743 224 + url = "https://files.pythonhosted.org/packages/00/b3/7bc1ff91d1ac18420b7ad1e169b618b27c00001b96310a89f8a9294fe509/hf_xet-1.3.2-cp37-abi3-musllinux_1_2_x86_64.whl" 225 + upload-time = 2026-02-27T17:26:03Z 226 + size = 4398020 227 227 228 228 [packages.wheels.hashes] 229 - sha256 = "e5063789c9d21f51e9ed4edbee8539655d3486e9cad37e96b7af967da20e8b16" 229 + sha256 = "06cdbde243c85f39a63b28e9034321399c507bcd5e7befdd17ed2ccc06dfe14e" 230 230 231 231 [[packages]] 232 232 name = "huggingface-hub" ··· 426 426 sha256 = "0f01dcf33e73d80bd8dc0f20a71303abbafa26a19e23f6b68d1aa9990af90257" 427 427 428 428 [[packages]] 429 + name = "openai-harmony" 430 + version = "0.0.8" 431 + index = "https://pypi.org/simple" 432 + 433 + [[packages.wheels]] 434 + url = "https://files.pythonhosted.org/packages/45/c6/2502f416d46be3ec08bb66d696cccffb57781a499e3ff2e4d7c174af4e8f/openai_harmony-0.0.8-cp38-abi3-macosx_11_0_arm64.whl" 435 + upload-time = 2025-11-05T19:06:57Z 436 + size = 2627806 437 + 438 + [packages.wheels.hashes] 439 + sha256 = "029ec25ca74abe48fdb58eb9fdd2a8c1618581fc33ce8e5653f8a1ffbfbd9326" 440 + 441 + [[packages.wheels]] 442 + url = "https://files.pythonhosted.org/packages/25/3f/1a192b93bb47c6b44cd98ba8cc1d3d2a9308f1bb700c3017e6352da11bda/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" 443 + upload-time = 2025-11-05T19:06:55Z 444 + size = 2953260 445 + 446 + [packages.wheels.hashes] 447 + sha256 = "c007d277218a50db8839e599ed78e0fffe5130f614c3f6d93ae257f282071a29" 448 + 449 + [[packages.wheels]] 450 + url = "https://files.pythonhosted.org/packages/60/c3/3d1e01e2dba517a91760e4a03e4f20ffc75039a6fe584d0e6f9b5c78fd15/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_x86_64.whl" 451 + upload-time = 2025-11-05T19:07:05Z 452 + size = 3205080 453 + 454 + [packages.wheels.hashes] 455 + sha256 = "007b0476a1f331f8130783f901f1da6f5a7057af1a4891f1b6a31dec364189b5" 456 + 457 + [[packages]] 458 + name = "openresponses-types" 459 + version = "2.3.0.post1" 460 + index = "https://pypi.org/simple" 461 + 462 + [[packages.wheels]] 463 + url = "https://files.pythonhosted.org/packages/b2/5f/e16dad89ed24f586da5b01b9b206d3adbf21fe1af8e4dc55d5b93158fde6/openresponses_types-2.3.0.post1-py3-none-any.whl" 464 + upload-time = 2026-01-22T20:02:02Z 465 + size = 13847 466 + 467 + [packages.wheels.hashes] 468 + sha256 = "88f6abcef9cad839203abff420dd080978bf6eb33cc06ddc5d78da4ccdba7613" 469 + 470 + [[packages]] 429 471 name = "packaging" 430 472 version = "26.0" 431 473 index = "https://pypi.org/simple" ··· 466 508 467 509 [[packages]] 468 510 name = "protobuf" 469 - version = "6.33.5" 511 + version = "7.34.0" 470 512 index = "https://pypi.org/simple" 471 513 472 514 [[packages.wheels]] 473 - url = "https://files.pythonhosted.org/packages/a2/6b/e48dfc1191bc5b52950246275bf4089773e91cb5ba3592621723cdddca62/protobuf-6.33.5-cp39-abi3-macosx_10_9_universal2.whl" 474 - upload-time = 2026-01-29T21:51:25Z 475 - size = 427766 515 + url = "https://files.pythonhosted.org/packages/13/c4/6322ab5c8f279c4c358bc14eb8aefc0550b97222a39f04eb3c1af7a830fa/protobuf-7.34.0-cp310-abi3-macosx_10_9_universal2.whl" 516 + upload-time = 2026-02-27T00:30:14Z 517 + size = 429248 476 518 477 519 [packages.wheels.hashes] 478 - sha256 = "a5cb85982d95d906df1e2210e58f8e4f1e3cdc088e52c921a041f9c9a0386de5" 520 + sha256 = "8e329966799f2c271d5e05e236459fe1cbfdb8755aaa3b0914fa60947ddea408" 479 521 480 522 [[packages.wheels]] 481 - url = "https://files.pythonhosted.org/packages/9b/53/a9443aa3ca9ba8724fdfa02dd1887c1bcd8e89556b715cfbacca6b63dbec/protobuf-6.33.5-cp39-abi3-manylinux2014_x86_64.whl" 482 - upload-time = 2026-01-29T21:51:28Z 483 - size = 323465 523 + url = "https://files.pythonhosted.org/packages/b5/57/89727baef7578897af5ed166735ceb315819f1c184da8c3441271dbcfde7/protobuf-7.34.0-cp310-abi3-manylinux2014_x86_64.whl" 524 + upload-time = 2026-02-27T00:30:20Z 525 + size = 324268 484 526 485 527 [packages.wheels.hashes] 486 - sha256 = "cbf16ba3350fb7b889fca858fb215967792dc125b35c7976ca4818bee3521cf0" 528 + sha256 = "964cf977e07f479c0697964e83deda72bcbc75c3badab506fb061b352d991b01" 487 529 488 530 [[packages.wheels]] 489 - url = "https://files.pythonhosted.org/packages/57/bf/2086963c69bdac3d7cff1cc7ff79b8ce5ea0bec6797a017e1be338a46248/protobuf-6.33.5-py3-none-any.whl" 490 - upload-time = 2026-01-29T21:51:32Z 491 - size = 170687 531 + url = "https://files.pythonhosted.org/packages/a4/e7/14dc9366696dcb53a413449881743426ed289d687bcf3d5aee4726c32ebb/protobuf-7.34.0-py3-none-any.whl" 532 + upload-time = 2026-02-27T00:30:23Z 533 + size = 170716 492 534 493 535 [packages.wheels.hashes] 494 - sha256 = "69915a973dd0f60f31a08b8318b73eab2bd6a392c79184b3612226b0a3f8ec02" 536 + sha256 = "e3b914dd77fa33fa06ab2baa97937746ab25695f389869afdf03e81f34e45dc7" 495 537 496 538 [[packages]] 497 539 name = "pydantic" ··· 603 645 604 646 [[packages]] 605 647 name = "regex" 606 - version = "2026.2.19" 648 + version = "2026.2.28" 607 649 index = "https://pypi.org/simple" 608 650 609 651 [[packages.wheels]] 610 - url = "https://files.pythonhosted.org/packages/d2/2d/a849835e76ac88fcf9e8784e642d3ea635d183c4112150ca91499d6703af/regex-2026.2.19-cp313-cp313-macosx_10_13_universal2.whl" 611 - upload-time = 2026-02-19T19:01:23Z 612 - size = 489329 652 + url = "https://files.pythonhosted.org/packages/87/f6/dc9ef48c61b79c8201585bf37fa70cd781977da86e466cd94e8e95d2443b/regex-2026.2.28-cp313-cp313-macosx_10_13_universal2.whl" 653 + upload-time = 2026-02-28T02:17:22Z 654 + size = 489311 613 655 614 656 [packages.wheels.hashes] 615 - sha256 = "8df08decd339e8b3f6a2eb5c05c687fe9d963ae91f352bc57beb05f5b2ac6879" 657 + sha256 = "6d63a07e5ec8ce7184452cb00c41c37b49e67dc4f73b2955b5b8e782ea970784" 616 658 617 659 [[packages.wheels]] 618 - url = "https://files.pythonhosted.org/packages/cd/58/714384efcc07ae6beba528a541f6e99188c5cc1bc0295337f4e8a868296d/regex-2026.2.19-cp313-cp313-macosx_11_0_arm64.whl" 619 - upload-time = 2026-02-19T19:01:27Z 620 - size = 289033 660 + url = "https://files.pythonhosted.org/packages/d2/a6/ba1068a631ebd71a230e7d8013fcd284b7c89c35f46f34a7da02082141b1/regex-2026.2.28-cp313-cp313-macosx_11_0_arm64.whl" 661 + upload-time = 2026-02-28T02:17:26Z 662 + size = 289051 621 663 622 664 [packages.wheels.hashes] 623 - sha256 = "c13228fbecb03eadbfd8f521732c5fda09ef761af02e920a3148e18ad0e09968" 665 + sha256 = "de0cf053139f96219ccfabb4a8dd2d217c8c82cb206c91d9f109f3f552d6b43d" 624 666 625 667 [[packages.wheels]] 626 - url = "https://files.pythonhosted.org/packages/8b/d9/e5dbef95008d84e9af1dc0faabbc34a7fbc8daa05bc5807c5cf86c2bec49/regex-2026.2.19-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" 627 - upload-time = 2026-02-19T19:01:34Z 628 - size = 803718 668 + url = "https://files.pythonhosted.org/packages/12/2f/049901def913954e640d199bbc6a7ca2902b6aeda0e5da9d17f114100ec2/regex-2026.2.28-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" 669 + upload-time = 2026-02-28T02:17:35Z 670 + size = 802101 629 671 630 672 [packages.wheels.hashes] 631 - sha256 = "9cbc69eae834afbf634f7c902fc72ff3e993f1c699156dd1af1adab5d06b7fe7" 673 + sha256 = "e61eea47230eba62a31f3e8a0e3164d0f37ef9f40529fb2c79361bc6b53d2a92" 632 674 633 675 [[packages.wheels]] 634 - url = "https://files.pythonhosted.org/packages/c3/45/ef68d805294b01ec030cfd388724ba76a5a21a67f32af05b17924520cb0b/regex-2026.2.19-cp313-cp313-musllinux_1_2_x86_64.whl" 635 - upload-time = 2026-02-19T19:01:47Z 636 - size = 790026 676 + url = "https://files.pythonhosted.org/packages/05/7a/51cfbad5758f8edae430cb21961a9c8d04bce1dae4d2d18d4186eec7cfa1/regex-2026.2.28-cp313-cp313-musllinux_1_2_x86_64.whl" 677 + upload-time = 2026-02-28T02:17:49Z 678 + size = 790152 637 679 638 680 [packages.wheels.hashes] 639 - sha256 = "790dbf87b0361606cb0d79b393c3e8f4436a14ee56568a7463014565d97da02a" 681 + sha256 = "9185cc63359862a6e80fe97f696e04b0ad9a11c4ac0a4a927f979f611bfe3768" 640 682 641 683 [[packages.wheels]] 642 - url = "https://files.pythonhosted.org/packages/a9/a2/e0b4575b93bc84db3b1fab24183e008691cd2db5c0ef14ed52681fbd94dd/regex-2026.2.19-cp313-cp313t-macosx_10_13_universal2.whl" 643 - upload-time = 2026-02-19T19:01:54Z 644 - size = 492202 684 + url = "https://files.pythonhosted.org/packages/24/07/6c7e4cec1e585959e96cbc24299d97e4437a81173217af54f1804994e911/regex-2026.2.28-cp313-cp313t-macosx_10_13_universal2.whl" 685 + upload-time = 2026-02-28T02:17:56Z 686 + size = 492541 645 687 646 688 [packages.wheels.hashes] 647 - sha256 = "93d881cab5afdc41a005dba1524a40947d6f7a525057aa64aaf16065cf62faa9" 689 + sha256 = "97054c55db06ab020342cc0d35d6f62a465fa7662871190175f1ad6c655c028f" 648 690 649 691 [[packages.wheels]] 650 - url = "https://files.pythonhosted.org/packages/70/0c/fe89966dfae43da46f475362401f03e4d7dc3a3c955b54f632abc52669e0/regex-2026.2.19-cp313-cp313t-macosx_11_0_arm64.whl" 651 - upload-time = 2026-02-19T19:01:59Z 652 - size = 291236 692 + url = "https://files.pythonhosted.org/packages/5b/11/c301f8cb29ce9644a5ef85104c59244e6e7e90994a0f458da4d39baa8e17/regex-2026.2.28-cp313-cp313t-macosx_11_0_arm64.whl" 693 + upload-time = 2026-02-28T02:18:00Z 694 + size = 291509 653 695 654 696 [packages.wheels.hashes] 655 - sha256 = "d793c5b4d2b4c668524cd1651404cfc798d40694c759aec997e196fe9729ec60" 697 + sha256 = "d6cfe798d8da41bb1862ed6e0cba14003d387c3c0c4a5d45591076ae9f0ce2f8" 656 698 657 699 [[packages.wheels]] 658 - url = "https://files.pythonhosted.org/packages/90/7c/981ea0694116793001496aaf9524e5c99e122ec3952d9e7f1878af3a6bf1/regex-2026.2.19-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" 659 - upload-time = 2026-02-19T19:02:08Z 660 - size = 812922 700 + url = "https://files.pythonhosted.org/packages/55/c2/fd429066da487ef555a9da73bf214894aec77fc8c66a261ee355a69871a8/regex-2026.2.28-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" 701 + upload-time = 2026-02-28T02:18:08Z 702 + size = 812044 661 703 662 704 [packages.wheels.hashes] 663 - sha256 = "1e7a08622f7d51d7a068f7e4052a38739c412a3e74f55817073d2e2418149619" 705 + sha256 = "5cdcc17d935c8f9d3f4db5c2ebe2640c332e3822ad5d23c2f8e0228e6947943a" 664 706 665 707 [[packages.wheels]] 666 - url = "https://files.pythonhosted.org/packages/59/78/9ef4356bd4aed752775bd18071034979b85f035fec51f3a4f9dea497a254/regex-2026.2.19-cp313-cp313t-musllinux_1_2_x86_64.whl" 667 - upload-time = 2026-02-19T19:02:20Z 668 - size = 799636 708 + url = "https://files.pythonhosted.org/packages/0a/50/414ba0731c4bd40b011fa4703b2cc86879ec060c64f2a906e65a56452589/regex-2026.2.28-cp313-cp313t-musllinux_1_2_x86_64.whl" 709 + upload-time = 2026-02-28T02:18:23Z 710 + size = 800184 669 711 670 712 [packages.wheels.hashes] 671 - sha256 = "c227f2922153ee42bbeb355fd6d009f8c81d9d7bdd666e2276ce41f53ed9a743" 713 + sha256 = "aaffaecffcd2479ce87aa1e74076c221700b7c804e48e98e62500ee748f0f550" 672 714 673 715 [[packages]] 674 716 name = "requests"

+2

server/stack/requirements/app-server/requirements-app-server.in

··· 5 5 mlx-lm==0.28.3 6 6 black==25.9.0 7 7 huggingface-hub==0.35.0 8 + openai-harmony==0.0.8 9 + openresponses-types

+2 -1

server/stack/venvstacks.toml

··· 27 27 "mlx-lm==0.28.3", 28 28 "black==25.9.0", 29 29 "huggingface-hub==0.35.0", 30 - 30 + "openai-harmony==0.0.8", 31 + "openresponses-types" 31 32 ] 32 33 33 34 [tool.uv]

+39

server/uv.lock

··· 306 306 ] 307 307 308 308 [[package]] 309 + name = "openai-harmony" 310 + version = "0.0.8" 311 + source = { registry = "https://pypi.org/simple" } 312 + dependencies = [ 313 + { name = "pydantic" }, 314 + ] 315 + sdist = { url = "https://files.pythonhosted.org/packages/3e/92/2d038d096f29179c7c9571b431f9e739f87a487121901725e23fe338dd9d/openai_harmony-0.0.8.tar.gz", hash = "sha256:6e43f98e6c242fa2de6f8ea12eab24af63fa2ed3e89c06341fb9d92632c5cbdf", size = 284777, upload-time = "2025-11-05T19:07:06.727Z" } 316 + wheels = [ 317 + { url = "https://files.pythonhosted.org/packages/45/c6/2502f416d46be3ec08bb66d696cccffb57781a499e3ff2e4d7c174af4e8f/openai_harmony-0.0.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:029ec25ca74abe48fdb58eb9fdd2a8c1618581fc33ce8e5653f8a1ffbfbd9326", size = 2627806, upload-time = "2025-11-05T19:06:57.063Z" }, 318 + { url = "https://files.pythonhosted.org/packages/d3/d2/ce6953ca87db9cae3e775024184da7d1c5cb88cead19a2d75b42f00a959c/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4f709815924ec325b9a890e6ab2bbb0ceec8e319a4e257328eb752cf36b2efc", size = 2948463, upload-time = "2025-11-05T19:06:48.17Z" }, 319 + { url = "https://files.pythonhosted.org/packages/fa/4c/b553c9651662d6ce102ca7f3629d268b23df1abe5841e24bed81e8a8e949/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5cfcfd963b50a41fc656c84d3440ca6eecdccd6c552158ce790b8f2e33dfb5a9", size = 2704083, upload-time = "2025-11-05T19:06:50.205Z" }, 320 + { url = "https://files.pythonhosted.org/packages/9b/af/4eec8f9ab9c27bcdb444460c72cf43011d176fc44c79d6e113094ca1e152/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a3a16972aa1cee38ea958470cd04ac9a2d5ac38fdcf77ab686611246220c158", size = 2959765, upload-time = "2025-11-05T19:06:53.62Z" }, 321 + { url = "https://files.pythonhosted.org/packages/11/3c/33f3374e4624e0e776f6b13b73c45a7ead7f9c4529f8369ed5bfcaa30cac/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b4d5cfa168e74d08f8ba6d58a7e49bc7daef4d58951ec69b66b0d56f4927a68d", size = 3427031, upload-time = "2025-11-05T19:06:51.829Z" }, 322 + { url = "https://files.pythonhosted.org/packages/25/3f/1a192b93bb47c6b44cd98ba8cc1d3d2a9308f1bb700c3017e6352da11bda/openai_harmony-0.0.8-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c007d277218a50db8839e599ed78e0fffe5130f614c3f6d93ae257f282071a29", size = 2953260, upload-time = "2025-11-05T19:06:55.406Z" }, 323 + { url = "https://files.pythonhosted.org/packages/5b/f8/93b582cad3531797c3db7c2db5400fd841538ccddfd9f5e3df61be99a630/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8565d4f5a0638da1bffde29832ed63c9e695c558611053add3b2dc0b56c92dbc", size = 3127044, upload-time = "2025-11-05T19:06:59.553Z" }, 324 + { url = "https://files.pythonhosted.org/packages/1d/10/4327dbf87f75ae813405fd9a9b4a5cde63d506ffed0a096a440a4cabd89c/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:cbaa3bda75ef0d8836e1f8cc84af62f971b1d756d740efc95c38c3e04c0bfde2", size = 2932931, upload-time = "2025-11-05T19:07:01.437Z" }, 325 + { url = "https://files.pythonhosted.org/packages/8a/c8/1774eec4f6f360ef57618fb8f52e3d3af245b2491bd0297513aa09eec04b/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:772922a9bd24e133950fad71eb1550836f415a88e8c77870e12d0c3bd688ddc2", size = 2996140, upload-time = "2025-11-05T19:07:03.438Z" }, 326 + { url = "https://files.pythonhosted.org/packages/60/c3/3d1e01e2dba517a91760e4a03e4f20ffc75039a6fe584d0e6f9b5c78fd15/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:007b0476a1f331f8130783f901f1da6f5a7057af1a4891f1b6a31dec364189b5", size = 3205080, upload-time = "2025-11-05T19:07:05.078Z" }, 327 + { url = "https://files.pythonhosted.org/packages/14/63/119de431572d7c70a7bf1037034a9be6ed0a7502a7498ba7302bca5b3242/openai_harmony-0.0.8-cp38-abi3-win32.whl", hash = "sha256:a9b5f893326b28d9e935ade14b4f655f5a840942473bc89b201c25f7a15af9cf", size = 2082457, upload-time = "2025-11-05T19:07:09.631Z" }, 328 + { url = "https://files.pythonhosted.org/packages/40/1f/c83cf5a206c263ee70448a5ae4264682555f4d0b5bed0d2cc6ca1108103d/openai_harmony-0.0.8-cp38-abi3-win_amd64.whl", hash = "sha256:39d44f0d8f466bd56698e7ead708bead3141e27b9b87e3ab7d5a6d0e4a869ee5", size = 2438369, upload-time = "2025-11-05T19:07:08.1Z" }, 329 + ] 330 + 331 + [[package]] 332 + name = "openresponses-types" 333 + version = "2.3.0.post1" 334 + source = { registry = "https://pypi.org/simple" } 335 + dependencies = [ 336 + { name = "pydantic" }, 337 + ] 338 + sdist = { url = "https://files.pythonhosted.org/packages/d9/26/b612c3215f5599714fa94d63eb5ee59b4eb66dbdeeaf86bb4d848359484d/openresponses_types-2.3.0.post1.tar.gz", hash = "sha256:11b8896d3621d2ac2439f6ff106f34ddcb1bbd517c317a6c852a9df2e98a0753", size = 19254, upload-time = "2026-01-22T20:02:03.933Z" } 339 + wheels = [ 340 + { url = "https://files.pythonhosted.org/packages/b2/5f/e16dad89ed24f586da5b01b9b206d3adbf21fe1af8e4dc55d5b93158fde6/openresponses_types-2.3.0.post1-py3-none-any.whl", hash = "sha256:88f6abcef9cad839203abff420dd080978bf6eb33cc06ddc5d78da4ccdba7613", size = 13847, upload-time = "2026-01-22T20:02:02.582Z" }, 341 + ] 342 + 343 + [[package]] 309 344 name = "packaging" 310 345 version = "25.0" 311 346 source = { registry = "https://pypi.org/simple" } ··· 501 536 { name = "fastapi" }, 502 537 { name = "huggingface-hub" }, 503 538 { name = "mlx-lm" }, 539 + { name = "openai-harmony" }, 540 + { name = "openresponses-types" }, 504 541 { name = "uvicorn" }, 505 542 ] 506 543 ··· 510 547 { name = "fastapi" }, 511 548 { name = "huggingface-hub", specifier = ">=0.34.0" }, 512 549 { name = "mlx-lm" }, 550 + { name = "openai-harmony", specifier = "==0.0.8" }, 551 + { name = "openresponses-types" }, 513 552 { name = "uvicorn" }, 514 553 ] 515 554

+1 -1

tiles/Cargo.toml

··· 1 1 [package] 2 2 name = "tiles" 3 - version = "0.4.1" 3 + version = "0.4.2" 4 4 edition = "2024" 5 5 6 6 [dependencies]

+8 -12

tiles/src/commands/mod.rs

··· 20 20 21 21 use crate::{AccountArgs, AccountCommands}; 22 22 23 - const FTUE_VERSION_TITLE: &str = "Tiles v0.4.1"; 23 + const FTUE_VERSION_TITLE: &str = "Tiles"; 24 24 const FTUE_HEADER: &str = "Initializing local account..."; 25 25 const FTUE_ASCII_ART: &str = r#" 26 26 ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ ··· 41 41 ▓▓▓▓▓▓▓▓ 42 42 "#; 43 43 const FTUE_REASSURANCE_LOCAL: &str = "On-device by default."; 44 - const FTUE_REASSURANCE_NO_CLOUD: &str = "Online models and identity optional."; 44 + // const FTUE_REASSURANCE_NO_CLOUD: &str = "Online models and identity optional."; 45 45 const FTUE_NICKNAME_PROMPT: &str = "Choose a username:"; 46 46 const FTUE_NICKNAME_REQUIRED: &str = "Username is required. Please enter a username:"; 47 47 const FTUE_ACCOUNT_CREATED: &str = "✓ Account created"; ··· 52 52 const FTUE_DATA_DIR_CHANGE_HINT: &str = "Change data path later:"; 53 53 const FTUE_DATA_DIR_CHANGE_COMMAND: &str = "tiles data set-path <PATH>"; 54 54 const FTUE_CUSTOM_DATA_PROMPT: &str = "Use a custom data directory now? [y/N]"; 55 + const FTUE_UPDATE_COMMAND: &str = "tiles update"; 55 56 56 57 pub fn run_setup_for_ftue(run_args: &RunArgs) -> Result<()> { 57 58 // initializes config directory ··· 62 63 let root_config = get_or_create_config()?; 63 64 let root_user_details = get_root_user_details(&root_config)?; 64 65 println!("{}", FTUE_ASCII_ART.blue()); 65 - println!("{}", FTUE_VERSION_TITLE); 66 + println!("{} {}", FTUE_VERSION_TITLE, env!("CARGO_PKG_VERSION")); 66 67 println!(); 67 68 68 69 if root_user_details.id.is_empty() { 69 70 println!("{}", FTUE_HEADER); 70 71 println!(); 71 72 println!("{}", FTUE_REASSURANCE_LOCAL); 72 - println!("{}", FTUE_REASSURANCE_NO_CLOUD); 73 73 println!(); 74 74 // FTUE 75 75 setup_root_account(root_config.clone())?; ··· 236 236 ); 237 237 238 238 println!("{}", update_str.yellow()); 239 - println!("You can always update via `tiles update` later\n"); 239 + println!("You can always update Tiles later via:"); 240 + println!(" {}\n", FTUE_UPDATE_COMMAND.bright_blue().bold()); 241 + println!("{}", "Do you want to update now? (Y/n)".to_string().green()); 240 242 241 - println!("{}", "Do you want to update now? (Y/N)".to_string().green()); 242 243 let stdin = io::stdin(); 243 244 let mut input = String::new(); 244 245 stdin.read_line(&mut input)?; ··· 341 342 342 343 #[test] 343 344 fn ftue_copy_matches_expected_constants() { 344 - assert_eq!(FTUE_VERSION_TITLE, "Tiles v0.4.1"); 345 345 assert_eq!(FTUE_HEADER, "Initializing local account..."); 346 346 assert_eq!(FTUE_REASSURANCE_LOCAL, "On-device by default."); 347 - assert_eq!( 348 - FTUE_REASSURANCE_NO_CLOUD, 349 - "Online models and identity optional." 350 - ); 351 347 assert_eq!(FTUE_NICKNAME_PROMPT, "Choose a username:"); 352 348 assert_eq!(FTUE_ACCOUNT_LABEL, "Account"); 353 349 assert_eq!(FTUE_ACCOUNT_DETAILS_HINT, "View full details:"); ··· 355 351 assert_eq!(FTUE_DATA_DIR_CHANGE_HINT, "Change data path later:"); 356 352 assert_eq!( 357 353 FTUE_CUSTOM_DATA_PROMPT, 358 - "Use a custom data directory now? [y/N]" 354 + "Use a custom data directory now? [Y/N]" 359 355 ); 360 356 } 361 357

+1 -3

tiles/src/main.rs

··· 132 132 }; 133 133 commands::run_setup_for_ftue(&run_args) 134 134 .inspect_err(|e| eprintln!("Failed to setup Tiles due to {:?}", e))?; 135 - commands::try_app_update() 136 - .await 137 - .inspect_err(|e| eprintln!("Failed to update the app due to {:?}", e))?; 135 + let _ = commands::try_app_update().await; 138 136 commands::run(&runtime, run_args).await; 139 137 } 140 138 Some(Commands::Run {

+67 -21

tiles/src/runtime/mlx.rs

··· 1 1 use crate::runtime::RunArgs; 2 2 use crate::utils::config::{ConfigProvider, DefaultProvider, get_memory_path}; 3 3 use crate::utils::hf_model_downloader::*; 4 - use anyhow::{Context, Result}; 4 + use anyhow::{Context, Result, anyhow}; 5 5 use futures_util::StreamExt; 6 6 use owo_colors::OwoColorize; 7 7 use reqwest::{Client, StatusCode}; ··· 47 47 // think: String, 48 48 reply: String, 49 49 code: String, 50 + prev_response_id: String, 50 51 metrics: Option<BenchmarkMetrics>, 51 52 } 52 53 ··· 245 246 let mut editor = Editor::<TilesHinter, DefaultHistory>::with_config(config).unwrap(); 246 247 editor.set_helper(Some(TilesHinter)); 247 248 let mut g_reply: String = "".to_owned(); 249 + let mut prev_response_id: String = String::from(""); 250 + 248 251 loop { 249 252 let readline = editor.readline(">>> "); 250 253 let input = match readline { ··· 292 295 &python_code, 293 296 &g_reply, 294 297 run_args, 298 + &prev_response_id, 295 299 ) 296 300 .await 297 301 { ··· 308 312 if run_args.memory { 309 313 println!("\n{}", response.reply.trim()); 310 314 } else { 315 + prev_response_id = response.prev_response_id; 311 316 println!("\n"); 312 317 } 313 318 // Display benchmark metrics if available ··· 345 350 } 346 351 } 347 352 348 - pub async fn ping() -> Result<(), String> { 353 + pub async fn ping() -> Result<()> { 349 354 let client = Client::new(); 350 355 let res = client.get("http://127.0.0.1:6969/ping").send().await; 351 356 352 357 match res { 353 - Err(_) => Err(String::from("Server is down")), 358 + Err(err) => Err(anyhow!("Server down due to {:?}", err)), 354 359 _ => Ok(()), 355 360 } 356 361 } ··· 365 370 let body = json!({ 366 371 "model": model_name, 367 372 "memory_path": memory_path, 368 - "system_prompt": modelfile.system.clone().unwrap_or(default_modelfile.system.clone().unwrap()) 373 + "system_prompt": modelfile.system.clone().unwrap_or(default_modelfile.system.clone().unwrap_or("".to_owned())) 369 374 }); 370 375 371 376 let res = client ··· 399 404 python_code: &str, 400 405 g_reply: &str, 401 406 run_args: &RunArgs, 402 - ) -> Result<ChatResponse, String> { 407 + prev_response_id: &str, 408 + ) -> Result<ChatResponse> { 403 409 let client = Client::new(); 404 - 405 410 let body = json!({ 406 411 "model": model_name, 412 + "input": [{ 413 + "type": "message", 414 + "role": "user", 415 + "content": input 416 + }, 417 + { 418 + "type": "message", 419 + "role": "developer", 420 + "content": "" 421 + }], 422 + "reasoning": {"effort": "medium"}, 423 + "chat_start": chat_start, 424 + "stream": true, 425 + "previous_response_id": prev_response_id, 426 + "python_code": python_code, 427 + "messages": [{"role": "assistant", "content": g_reply}, {"role": "user", "content": input}] 428 + }); 429 + 430 + let memory_body = json!({ 431 + "model": model_name, 407 432 "input": input, 408 433 "chat_start": chat_start, 409 434 "stream": true, 410 435 "python_code": python_code, 411 436 "messages": [{"role": "assistant", "content": g_reply}, {"role": "user", "content": input}] 437 + 412 438 }); 413 - let api_url = if run_args.memory { 414 - "http://127.0.0.1:6969/v1/chat/completions" 439 + let res = if run_args.memory { 440 + let api_url = "http://127.0.0.1:6969/v1/chat/completions"; 441 + client.post(api_url).json(&memory_body).send().await? 415 442 } else { 416 - "http://127.0.0.1:6969/v1/responses" 443 + let api_url = "http://127.0.0.1:6969/v1/responses"; 444 + client.post(api_url).json(&body).send().await? 417 445 }; 418 - let res = client.post(api_url).json(&body).send().await.unwrap(); 419 446 420 447 let mut stream = res.bytes_stream(); 421 448 let mut accumulated = String::new(); 422 449 println!(); 423 450 let mut metrics: Option<BenchmarkMetrics> = None; 424 451 let mut is_answer_start = false; 452 + let mut prev_response_id: String = String::from(""); 453 + let mut output_completed: bool = false; 425 454 while let Some(chunk) = stream.next().await { 426 - let chunk = chunk.unwrap(); 455 + let chunk = chunk?; 427 456 let s = String::from_utf8_lossy(&chunk); 428 457 for line in s.lines() { 429 458 if !line.starts_with("data: ") { ··· 436 465 return Ok(convert_to_chat_response( 437 466 &accumulated, 438 467 run_args.memory, 468 + prev_response_id, 439 469 metrics, 440 470 )); 441 471 } 442 472 443 - // Parse JSON 473 + //TODO: This will break if we ask the model to give an essay and all 444 474 let v: Value = serde_json::from_str(data).unwrap(); 445 475 // Check for metrics in the response 446 476 if let Some(metrics_obj) = v.get("metrics") { ··· 449 479 let model_text: Option<&str> = if run_args.memory { 450 480 v["choices"][0]["delta"]["content"].as_str() 451 481 } else { 482 + prev_response_id = serde_json::to_string(&v["id"])?; 483 + // println!("prev_id {}", prev_response_id); 484 + if serde_json::to_string(&v["status"])?.contains("completed") { 485 + output_completed = true; 486 + } 487 + 452 488 v["output"][0]["content"][0]["text"].as_str() 453 489 }; 454 490 455 491 if let Some(delta) = model_text { 456 - accumulated.push_str(delta); 457 - if !run_args.memory && delta.contains("**[Answer]**") { 458 - is_answer_start = true; 459 - } 460 - if !is_answer_start { 461 - print!("{}", delta.dimmed()); 492 + if !run_args.memory { 493 + // TODO: This doesn't support non-harmonic models, so need to handle it 494 + if delta.contains("**[Answer]**") { 495 + is_answer_start = true 496 + } 497 + if !output_completed { 498 + accumulated.push_str(delta); 499 + if !is_answer_start { 500 + print!("{}", delta.dimmed()); 501 + } else { 502 + print!("{}", delta); 503 + }; 504 + } 462 505 } else { 463 - print!("{}", delta); 506 + accumulated.push_str(delta); 464 507 } 465 508 use std::io::Write; 466 509 std::io::stdout().flush().ok(); 467 510 } 468 511 } 469 512 } 470 - Err(String::from("request failed")) 513 + 514 + Err(anyhow!("Result failed")) 471 515 } 472 516 473 517 fn convert_to_chat_response( 474 518 content: &str, 475 519 memory_mode: bool, 520 + prev_response_id: String, 476 521 metrics: Option<BenchmarkMetrics>, 477 522 ) -> ChatResponse { 478 523 ChatResponse { 479 524 reply: extract_reply(content, memory_mode), 480 525 code: extract_python(content), 526 + prev_response_id, 481 527 metrics, 482 528 } 483 529 } ··· 511 557 Ok(()) => { 512 558 break; 513 559 } 514 - Err(_) => { 560 + Err(_err) => { 515 561 println!("tiling..."); 516 562 sleep(Duration::from_secs(5)).await; 517 563 }

+5 -6

tiles/src/utils/installer.rs

··· 11 11 12 12 use anyhow::{Result, anyhow}; 13 13 use reqwest::{Client, header::HeaderMap}; 14 - use semver::{Version, VersionReq}; 14 + use semver::Version; 15 15 use serde::Deserialize; 16 16 17 17 const RELEASES_BASE_ENDPOINT: &str = "https://api.github.com"; ··· 64 64 65 65 pub async fn get_update_info() -> Result<UpdateInfo> { 66 66 let latest_vsn = get_latest_version(RELEASES_BASE_ENDPOINT).await?; 67 - 68 - let req_vsn = VersionReq::parse(&latest_vsn)?; 67 + let req_vsn = Version::parse(&latest_vsn)?; 69 68 let current_vsn = Version::parse(env!("CARGO_PKG_VERSION")) 70 69 .map_err(|e| anyhow!("Failed to parse pkg version due to {}", e))?; 71 70 72 - if req_vsn.matches(&current_vsn) { 71 + if req_vsn.cmp_precedence(&current_vsn).is_gt() { 73 72 Ok(UpdateInfo { 74 - can_update: false, 73 + can_update: true, 75 74 latest_version: req_vsn.to_string(), 76 75 current_version: current_vsn.to_string(), 77 76 }) 78 77 } else { 79 78 Ok(UpdateInfo { 80 - can_update: true, 79 + can_update: false, 81 80 latest_version: req_vsn.to_string(), 82 81 current_version: current_vsn.to_string(), 83 82 })

Configure Feed

Configure Feed