Merge pull request #76 from tilesprivacy/feat/prepack-modelfiles

+1 -1

Cargo.lock

··· 3297 3297 3298 3298 [[package]] 3299 3299 name = "tilekit" 3300 - version = "0.1.0" 3300 + version = "0.2.0" 3301 3301 dependencies = [ 3302 3302 "anyhow", 3303 3303 "bon",

+40

modelfiles/gpt-oss

··· 1 + FROM mlx-community/gpt-oss-20b-MXFP4-Q4 2 + SYSTEM """ 3 + You are Tiles, a helpful AI assistant. You have access to a secure Python sandbox for running code and managing your memory. 4 + 5 + ## CRITICAL: Output Format 6 + Your output must be structured into three distinct channels using these exact markers: 7 + 8 + 1. **Analysis Channel**: Thinking and planning. 9 + - Start: `<|channel|>analysis<|message|>` 10 + - End: `<|end|>` 11 + 12 + 2. **Code Channel**: Python code to execute. 13 + - Start: `<|channel|>code<|message|>` 14 + - End: `<|end|>` 15 + 16 + 3. **Final Response Channel**: Your final answer to the user. 17 + - Start: `<|channel|>final<|message|>` 18 + - End: `<|end|>` 19 + 20 + **Rules**: 21 + - ALWAYS start with the Analysis channel. 22 + - If you need to run code, use the Code channel. 23 + - If no code is needed, use the Final Response channel after Analysis. 24 + - **CRITICAL: ALWAYS assign function results and calculations to variables.** 25 + ```python 26 + # CORRECT 27 + result = math.sqrt(12345) 28 + # WRONG - The result will be LOST 29 + math.sqrt(12345) 30 + ``` 31 + - NEVER mention "ChatGPT" or "OpenAI". You are Tiles. 32 + - NEVER use legacy tags like `<think>`, `<python>`, or `<reply>`. Use ONLY the channel markers above. 33 + 34 + ### Handling Results 35 + When you receive a `<result>` block, it indicates the outcome of your code. 36 + - Analyze the result in the **Analysis** channel. 37 + - If the calculation is complete, provide the final answer in the **Final Response** channel immediately. 38 + - **DO NOT** repeat the code once you have the results unless you need to fix a specific error. 39 + - **DO NOT** ask the user if you should run code; just run it if needed using the Code channel. 40 + """

+3

scripts/bundler.sh

··· 3 3 4 4 BINARY_NAME="tiles" 5 5 DIST_DIR="dist" 6 + MODELFILE_DIR="modelfiles" 6 7 SERVER_DIR="server" 7 8 TARGET="release" 8 9 ··· 35 36 rm -rf "${DIST_DIR}/tmp/server/__pycache__" 36 37 rm -rf "${DIST_DIR}/tmp/server/.venv" 37 38 rm -rf "${DIST_DIR}/tmp/server/stack" 39 + 40 + cp -r "${MODELFILE_DIR}" "${DIST_DIR}/tmp/" 38 41 39 42 echo "📦 Creating ${OUT_NAME}.tar.gz..." 40 43 tar --exclude-from=scripts/tar.exclude -czf "${DIST_DIR}/${OUT_NAME}.tar.gz" -C "${DIST_DIR}/tmp" .

+9 -1

scripts/install.sh

··· 6 6 # VERSION="${TILES_VERSION:-latest}" 7 7 VERSION="0.4.0-rc.1" 8 8 INSTALL_DIR="$HOME/.local/bin" # CLI install location 9 - SERVER_DIR="$HOME/.local/share/tiles/server" # Python server folder 9 + SERVER_DIR="$HOME/.local/lib/tiles/server" # Python server folder 10 + MODELFILE_DIR="$HOME/.local/lib/tiles/modelfiles" # Python server folder 10 11 TMPDIR="$(mktemp -d)" 11 12 OS=$(uname -s | tr '[:upper:]' '[:lower:]') 12 13 ARCH=$(uname -m) ··· 33 34 log "📦 Installing tiles binary to ${INSTALL_DIR}..." 34 35 mkdir -p "${INSTALL_DIR}" 35 36 install -m 755 "${TMPDIR}/tiles" "${INSTALL_DIR}/tiles" 37 + 38 + log "Unpacking libs ..." 39 + rm -rf "${MODELFILE_DIR}" 40 + 41 + mkdir -p "${MODELFILE_DIR}" 42 + 43 + cp -r "${TMPDIR}/modelfiles"/* "${MODELFILE_DIR}/" 36 44 37 45 log "📦 Installing Python server to ${SERVER_DIR}..." 38 46 rm -rf "${SERVER_DIR}"

+1 -2

server/api.py

··· 1 1 from fastapi import FastAPI, HTTPException 2 2 3 3 from .schemas import ChatMessage, ChatCompletionRequest, StartRequest, downloadRequest 4 - from .config import SYSTEM_PROMPT 5 4 import logging 6 5 import sys 7 6 from typing import Optional ··· 45 44 """Load the model and start the agent""" 46 45 global _messages, _runner, _memory_path 47 46 48 - _messages = [ChatMessage(role="system", content=SYSTEM_PROMPT)] 47 + _messages = [ChatMessage(role="system", content=request.system_prompt)] 49 48 _memory_path = request.memory_path 50 49 logger.info(f"{runtime.backend}") 51 50 runtime.backend.get_or_load_model(request.model)

-4

server/config.py

··· 3 3 PORT = 6969 4 4 MODEL_ID = "driaforall/mem-agent" 5 5 6 - prompt_path = Path(__file__).parent / "system_prompt.txt" 7 6 MEMORY_PATH = os.path.expanduser("~") + "/tiles_memory" 8 - 9 - with open(prompt_path, "r", encoding="utf-8") as f: 10 - SYSTEM_PROMPT = f.read().strip()

+1 -1

server/schemas.py

··· 59 59 class StartRequest(BaseModel): 60 60 model: str 61 61 memory_path: str 62 - 62 + system_prompt: str 63 63 64 64 class downloadRequest(BaseModel): 65 65 model: str

+1 -1

server/stack/requirements/app-server/packages-app-server.txt

··· 18 18 mlx-lm==0.28.3 19 19 mypy-extensions==1.1.0 20 20 numpy==2.4.1 21 - packaging==25.0 21 + packaging==26.0 22 22 pathspec==1.0.3 23 23 platformdirs==4.5.1 24 24 protobuf==6.33.4

+3 -3

server/stack/requirements/app-server/pylock.app-server.meta.json

··· 1 1 { 2 2 "lock_input_hash": "sha256:182c606e20dd957344cc3adc54391f47f4b6dd80b4481ddf219392a7aad6e0ce", 3 3 "lock_version": 1, 4 - "locked_at": "2026-01-21T09:13:58.607286+00:00", 4 + "locked_at": "2026-01-22T05:41:48.443112+00:00", 5 5 "other_inputs_hash": "sha256:63b3c2cfe2ec414938e81dace7aac779c7b902bae681618cd8827e9f16880985", 6 - "requirements_hash": "sha256:41b3e6ec3cd37289edeb1c134ce836c0dfa7843d7dd3dc28a1b46880d77bf029", 7 - "version_inputs_hash": "sha256:53726e1053a34cced52a7d0c9b2aa679dad94259b51681758674ae4320bbb7a4" 6 + "requirements_hash": "sha256:a08c15387b6f199fe37fad0855c14ffde941d1c0b49f94fa1ed48a9464fab9a6", 7 + "version_inputs_hash": "sha256:58db986b7cd72eeded675f7c9afd8138fe024fb51451131b5562922bbde3cf43" 8 8 }

+5 -5

server/stack/requirements/app-server/pylock.app-server.toml

··· 427 427 428 428 [[packages]] 429 429 name = "packaging" 430 - version = "25.0" 430 + version = "26.0" 431 431 index = "https://pypi.org/simple" 432 432 433 433 [[packages.wheels]] 434 - url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl" 435 - upload-time = 2025-04-19T11:48:57Z 436 - size = 66469 434 + url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl" 435 + upload-time = 2026-01-21T20:50:37Z 436 + size = 74366 437 437 438 438 [packages.wheels.hashes] 439 - sha256 = "29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484" 439 + sha256 = "b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529" 440 440 441 441 [[packages]] 442 442 name = "pathspec"

+3

server/system_prompt.txt modelfiles/mem-agent

··· 1 + FROM driaforall/mem-agent-mlx-4bit 2 + SYSTEM """ 1 3 # Memory Agent System Prompt 2 4 3 5 You are an LLM agent with a self-managed, Obsidian-like memory system. You interact with memory using Python code blocks. ··· 304 306 ## Filtering 305 307 306 308 In the user query, you might receive a fact-retrieval question that incudes <filter> tags. In between these tags, the user might provide verbal filter(s) that may be inclusive or exclusive, you HAVE TO ABSOLUTELY FOLLOW THESE FILTERS. These filters provide privacy over user information. If there are no filters, just return the answer as is. 309 + """

+1 -1

tilekit/Cargo.toml

··· 1 1 [package] 2 2 name = "tilekit" 3 - version = "0.1.0" 3 + version = "0.2.0" 4 4 edition = "2024" 5 5 6 6 [dependencies]

+1 -1

tiles/src/commands/mod.rs

··· 8 8 pub use tilekit::optimize::optimize; 9 9 10 10 pub async fn run(runtime: &Runtime, run_args: RunArgs) { 11 - runtime.run(run_args).await; 11 + let _ = runtime.run(run_args).await; 12 12 } 13 13 14 14 pub fn set_memory(path: &str) {

+5

tiles/src/main.rs

··· 51 51 /// Max times cli communicates with the model until it gets a proper reply for a user prompt 52 52 #[arg(short = 'r', long, default_value_t = 10)] 53 53 relay_count: u32, 54 + 55 + /// Switches the mode to memory, used for interacting with memory models. 56 + #[arg(short = 'm', long)] 57 + memory: bool, 54 58 // Future flags go here: 55 59 // #[arg(long, default_value_t = 6969)] 56 60 // port: u16, ··· 98 102 let run_args = RunArgs { 99 103 modelfile_path, 100 104 relay_count: flags.relay_count, 105 + memory: flags.memory, 101 106 }; 102 107 commands::run(&runtime, run_args).await; 103 108 }

+1 -1

tiles/src/runtime/cpu.rs

··· 12 12 pub fn new() -> Self { 13 13 CPURuntime {} 14 14 } 15 - pub async fn run(&self, _run_args: super::RunArgs) { 15 + pub async fn run(&self, _run_args: super::RunArgs) -> Result<()> { 16 16 unimplemented!() 17 17 } 18 18

+38 -15

tiles/src/runtime/mlx.rs

··· 1 1 use crate::runtime::RunArgs; 2 2 use crate::utils::config::{ 3 - create_default_memory_folder, get_config_dir, get_default_memory_path, get_memory_path, 4 - get_server_dir, set_memory_path, 3 + create_default_memory_folder, get_config_dir, get_default_memory_path, get_lib_dir, 4 + get_memory_path, set_memory_path, 5 5 }; 6 6 use crate::utils::hf_model_downloader::*; 7 7 use anyhow::{Context, Result}; ··· 17 17 use serde_json::{Value, json}; 18 18 use std::fs; 19 19 use std::fs::File; 20 + use std::path::PathBuf; 20 21 use std::process::Stdio; 21 22 use std::time::Duration; 22 23 use std::{io, process::Command}; ··· 42 43 MLXRuntime {} 43 44 } 44 45 45 - pub async fn run(&self, run_args: super::RunArgs) { 46 - const DEFAULT_MODELFILE: &str = "FROM driaforall/mem-agent-mlx-4bit"; 47 - //Parse modelfile 46 + pub async fn run(&self, run_args: super::RunArgs) -> Result<()> { 47 + let default_modelfile_path = get_default_modelfile(run_args.memory)?; 48 + let default_modelfile = 49 + tilekit::modelfile::parse_from_file(default_modelfile_path.to_str().unwrap()).unwrap(); 48 50 let modelfile_parse_result = if let Some(modelfile_str) = &run_args.modelfile_path { 49 51 tilekit::modelfile::parse_from_file(modelfile_str.as_str()) 50 52 } else { 51 - tilekit::modelfile::parse(DEFAULT_MODELFILE) 53 + Err("NOT PROVIDED".to_string()) 52 54 }; 53 55 54 56 let modelfile = match modelfile_parse_result { 55 57 Ok(mf) => mf, 58 + Err(err) if err == "NOT PROVIDED" => default_modelfile.clone(), 56 59 Err(_err) => { 57 60 println!("Invalid Modelfile"); 58 - return; 61 + return Ok(()); 59 62 } 60 63 }; 61 64 62 - let _res = run_model_with_server(self, modelfile, &run_args) 65 + run_model_with_server(self, modelfile, default_modelfile, &run_args) 63 66 .await 64 - .inspect_err(|e| eprintln!("Failed to run the model due to {e}")); 67 + .inspect_err(|e| eprintln!("Failed to run the model due to {e}")) 65 68 } 66 69 67 70 #[allow(clippy::zombie_processes)] ··· 76 79 } 77 80 78 81 let config_dir = get_config_dir()?; 79 - let mut server_dir = get_server_dir()?; 82 + let mut server_dir = get_lib_dir()?; 80 83 let pid_file = config_dir.join("server.pid"); 81 84 fs::create_dir_all(&config_dir).context("Failed to create config directory")?; 82 - 85 + server_dir = server_dir.join("server"); 83 86 let stdout_log = File::create(config_dir.join("server.out.log"))?; 84 87 let stderr_log = File::create(config_dir.join("server.err.log"))?; 85 88 let server_path = server_dir.join("stack_export_prod/app-server/bin/python"); ··· 257 260 async fn run_model_with_server( 258 261 mlx_runtime: &MLXRuntime, 259 262 modelfile: Modelfile, 263 + default_modelfile: Modelfile, 260 264 run_args: &RunArgs, 261 265 ) -> Result<()> { 262 266 if !cfg!(debug_assertions) { ··· 268 272 // loading the model from mem-agent via daemon server 269 273 let memory_path = get_or_set_memory_path().context("Setting/Retrieving memory_path failed")?; 270 274 let modelname = modelfile.from.as_ref().unwrap(); 271 - match load_model(modelname, &memory_path).await { 275 + match load_model(&modelfile, &default_modelfile, &memory_path).await { 272 276 Ok(_) => start_repl(mlx_runtime, modelname, run_args).await, 273 277 Err(err) => return Err(anyhow::anyhow!(err)), 274 278 } ··· 426 430 } 427 431 } 428 432 429 - async fn load_model(model_name: &str, memory_path: &str) -> Result<()> { 433 + async fn load_model( 434 + modelfile: &Modelfile, 435 + default_modelfile: &Modelfile, 436 + memory_path: &str, 437 + ) -> Result<()> { 430 438 let client = Client::new(); 439 + let model_name = modelfile.from.clone().unwrap(); 431 440 let body = json!({ 432 441 "model": model_name, 433 - "memory_path": memory_path 442 + "memory_path": memory_path, 443 + "system_prompt": modelfile.system.clone().unwrap_or(default_modelfile.system.clone().unwrap()) 434 444 }); 435 445 436 446 let res = client ··· 442 452 StatusCode::OK => Ok(()), 443 453 StatusCode::NOT_FOUND => { 444 454 println!("Downloading {}\n", model_name); 445 - match pull_model(model_name).await { 455 + match pull_model(&model_name).await { 446 456 Ok(_) => { 447 457 println!("\nDownloading completed \n"); 448 458 Ok(()) ··· 549 559 } 550 560 } 551 561 } 562 + 563 + fn get_default_modelfile(memory_mode: bool) -> Result<PathBuf> { 564 + // get default by the args -m 565 + // let path = 566 + if memory_mode { 567 + let path = get_lib_dir()?.join("modelfiles/mem-agent"); 568 + Ok(path) 569 + } else { 570 + // let path = get_lib_dir()?.join("modelfiles/gpt-oss"); 571 + let path = get_lib_dir()?.join("modelfiles/mem-agent"); 572 + Ok(path) 573 + } 574 + }

+2 -2

tiles/src/runtime/mod.rs

··· 8 8 pub struct RunArgs { 9 9 pub modelfile_path: Option<String>, 10 10 pub relay_count: u32, 11 - // Future flags go here 11 + pub memory: bool, // Future flags go here 12 12 } 13 13 14 14 pub enum Runtime { ··· 17 17 } 18 18 19 19 impl Runtime { 20 - pub async fn run(&self, run_args: RunArgs) { 20 + pub async fn run(&self, run_args: RunArgs) -> Result<()> { 21 21 match self { 22 22 Runtime::Mlx(runtime) => runtime.run(run_args).await, 23 23 Runtime::Cpu(runtime) => runtime.run(run_args).await,

+12 -7

tiles/src/utils/config.rs

··· 60 60 Ok(memory_path) 61 61 } 62 62 63 - pub fn get_server_dir() -> Result<PathBuf> { 63 + pub fn get_lib_dir() -> Result<PathBuf> { 64 64 if cfg!(debug_assertions) { 65 65 let base_dir = env::current_dir().context("Failed to fetch CURRENT_DIR")?; 66 - Ok(base_dir.join("server")) 66 + Ok(base_dir) 67 67 } else { 68 68 let home_dir = env::home_dir().context("Failed to fetch $HOME")?; 69 - let data_dir = match env::var("XDG_DATA_HOME") { 70 - Ok(val) => PathBuf::from(val), 71 - Err(_err) => home_dir.join(".local/share"), 72 - }; 73 - Ok(data_dir.join("tiles/server")) 69 + let data_dir = home_dir.join(".local/lib"); 70 + Ok(data_dir.join("tiles")) 74 71 } 75 72 } 73 + 76 74 pub fn get_config_dir() -> Result<PathBuf> { 77 75 if cfg!(debug_assertions) { 78 76 let base_dir = env::current_dir().context("Failed to fetch CURRENT_DIR")?; ··· 100 98 Ok(data_dir.join("tiles")) 101 99 } 102 100 } 101 + 102 + pub fn is_memory_model(modelname: &str) -> bool { 103 + if modelname.contains("mem") { 104 + return true; 105 + } 106 + false 107 + }

Configure Feed

Configure Feed