Merge pull request #101 from tilesprivacy/feat/bundle-model

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

A local-first private AI assistant for everyday use. Runs on-device models with encrypted P2P sync, and supports sharing chats publicly on ATProto.

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge pull request #101 from tilesprivacy/feat/bundle-model

Bundle models for the offline installer

authored by

Anandu Pavanan and committed by

GitHub 2 months ago b5badba3 fb01df5a

+251 -109

13 changed files

expand all collapse all

.gitignore

justfile

modelfiles

qwen

pkg

build.sh

build_full.sh

build_model.sh

pkg_building.md

server

api.py

backend

mlx.py

schemas.py

tiles

src

runtime

mlx.rs

utils

config.rs

hf_model_downloader.rs

.gitignore

reviewed

··· 8 8 .DS_Store 9 9 pkgroot/ 10 10 *.pkg 11 11 + models/ 11 12 pkgroot_models/

justfile

reviewed

··· 23 23 24 24 bundle_pkg: 25 25 ./pkg/build.sh 26 26 + 27 27 + bundle_model_pkg: 28 28 + ./pkg/build_model.sh 29 29 + 30 30 + bundle_pkg_full: 31 31 + ./pkg/build.sh 32 32 + ./pkg/build_full.sh

modelfiles/qwen

reviewed

··· 1 1 FROM mlx-community/Qwen3.5-4B-MLX-4bit 2 2 + # FROM mlx-community/Qwen3-0.6B-4bit

+9 -6

pkg/build.sh

reviewed

··· 8 8 MODELFILE_DIR="modelfiles" 9 9 SERVER_DIR="server" 10 10 BINARY_NAME="tiles" 11 11 - 11 11 + MODELS_DIR="models" 12 12 VERSION=$(grep '^version' tiles/Cargo.toml | head -1 | awk -F'"' '{print $2}') 13 13 OS=$(uname -s | tr '[:upper:]' '[:lower:]') 14 14 ARCH=$(uname -m) ··· 71 71 72 72 73 73 # Creating .pkg 74 74 - pkgbuild --root pkgroot --scripts pkg/scripts --identifier com.tilesprivacy.tiles --version "$VERSION" "tiles-${VERSION}".pkg 74 74 + pkgbuild --root pkgroot --scripts pkg/scripts --identifier com.tilesprivacy.tiles --version "$VERSION" "tiles-${VERSION}-unsigned".pkg 75 75 76 76 77 77 # signing 78 78 productsign \ 79 79 --sign "$DEVELOPER_ID_INSTALLER" \ 80 80 - "tiles-${VERSION}.pkg" \ 81 81 - "tiles-${VERSION}-signed.pkg" 80 80 + "tiles-${VERSION}-unsigned.pkg" \ 81 81 + "tiles-${VERSION}.pkg" 82 82 + 83 83 + rm "tiles-${VERSION}-unsigned.pkg" 82 84 83 85 # notarizing 84 84 - xcrun notarytool submit "tiles-${VERSION}-signed.pkg"\ 86 86 + xcrun notarytool submit "tiles-${VERSION}.pkg"\ 85 87 --keychain-profile "tiles-notary-profile" \ 86 88 --wait 87 89 88 90 # staple the approval ticket to pkg 89 89 - xcrun stapler staple "tiles-${VERSION}-signed.pkg" 91 91 + xcrun stapler staple "tiles-${VERSION}.pkg" 92 92 +

+25

pkg/build_full.sh

reviewed

··· 1 1 + #!/usr/bin/env bash 2 2 + 3 3 + set -euo pipefail 4 4 + 5 5 + VERSION=$(grep '^version' tiles/Cargo.toml | head -1 | awk -F'"' '{print $2}') 6 6 + 7 7 + # bundling the models 8 8 + productbuild --package "tiles-${VERSION}".pkg --package tiles-model.pkg "tiles-${VERSION}-full-unsigned".pkg 9 9 + 10 10 + 11 11 + # signing 12 12 + productsign \ 13 13 + --sign "$DEVELOPER_ID_INSTALLER" \ 14 14 + "tiles-${VERSION}-full-unsigned.pkg" \ 15 15 + "tiles-${VERSION}-full.pkg" 16 16 + 17 17 + # notarizing 18 18 + xcrun notarytool submit "tiles-${VERSION}-full.pkg"\ 19 19 + --keychain-profile "tiles-notary-profile" \ 20 20 + --wait 21 21 + 22 22 + # staple the approval ticket to pkg 23 23 + xcrun stapler staple "tiles-${VERSION}-full.pkg" 24 24 + 25 25 +

pkg/build_model.sh

reviewed

··· 1 1 + # model pkg command, run when model changes, or need a local copy for final pkg 2 2 + MODELS_VERSION=1.0 3 3 + pkgbuild --root pkgroot_models --identifier com.tilesprivacy.tiles_models --version "$MODELS_VERSION" tiles-model.pkg

+42

pkg/pkg_building.md

reviewed

··· 1 1 + ## How the Tiles pkgs are build 2 2 + 3 3 + ### Network Installer 4 4 + 5 5 + Network installer is basically Tiles without any ML models included in it. 6 6 + So when model is needed, Tiles will download it. (Maybe in a later version 7 7 + a user should be able to download from its peers locally too). 8 8 + 9 9 + ``` 10 10 + just bundle_pkg 11 11 + ``` 12 12 + 13 13 + Creates tiles-<VERSION>.pkg, signs and notarize it 14 14 + 15 15 + 16 16 + ### Offline Installer 17 17 + 18 18 + Offline Installer includes the default model too in it, so once 19 19 + downloaded provides a portable installer, and can work w/o 20 20 + internet forever and ever... 21 21 + 22 22 + ``` 23 23 + just bundle_model_pkg 24 24 + 25 25 + ``` 26 26 + 27 27 + This will bundle only the model in a .pkg. 28 28 + 29 29 + > We run this command only when a model is updated/added etc.. 30 30 + Since this is a time taking process and is not needed to run 31 31 + in every release build 32 32 + 33 33 + The basic approach we will take for offline installer building is that 34 34 + we build 2 pkgs essentially, the network installer and a pkg with 35 35 + only models. Then we create a final package that has these 2 pkgs with 36 36 + the command below. 37 37 + 38 38 + 39 39 + ``` 40 40 + just bundle_pkg_full 41 41 + 42 42 + ```

+2 -1

server/api.py

reviewed

··· 47 47 async def start_model(request: StartRequest): 48 48 """Load the model and start the agent""" 49 49 global _messages, _runner, _memory_path 50 50 + print(f"CACHE PATH{request.model_cache_path}") 50 51 51 52 _messages = [ChatMessage(role="system", content=request.system_prompt)] 52 53 _memory_path = request.memory_path 53 54 logger.info(f"{runtime.backend}") 54 54 - runtime.backend.get_or_load_model(request.model) 55 55 + runtime.backend.get_or_load_model(request.model, request.model_cache_path) 55 56 return {"message": "Model loaded"} 56 57 57 58

+36 -48

server/backend/mlx.py

reviewed

··· 3 3 import time 4 4 import uuid 5 5 from collections.abc import AsyncGenerator 6 6 - 6 6 + from pathlib import Path 7 7 from fastapi import HTTPException 8 8 from openai_harmony import ( 9 9 Conversation, ··· 54 54 raise HTTPException(status_code=400, detail="Downloading model failed") 55 55 56 56 57 57 - def get_or_load_model(model_spec: str, verbose: bool = True) -> MLXRunner: 57 57 + def get_or_load_model( 58 58 + model_spec: str, model_cache_path: str | None = None, verbose: bool = True 59 59 + ) -> MLXRunner: 58 60 """Get model from cache or load it if not cached.""" 59 61 global _model_cache, _current_model_path 60 60 - 61 61 - # Use the existing model path resolution from cache_utils 62 62 - 63 63 - try: 64 64 - model_path, model_name, commit_hash = get_model_path(model_spec) 65 65 - if not model_path.exists(): 66 66 - logger.info(f"Model {model_spec} not found in cache") 67 67 - raise HTTPException( 68 68 - status_code=404, detail=f"Model {model_spec} not found in cache" 69 69 - ) 70 70 - except Exception as e: 71 71 - logger.info(f"Model {model_spec} not found in: {str(e)}") 72 72 - raise HTTPException( 73 73 - status_code=404, detail=f"Model {model_spec} not found: {str(e)}" 74 74 - ) 75 75 - 76 76 - # Check if it's an MLX model 77 77 - 78 78 - model_path_str = str(model_path) 79 79 - 80 80 - # Check if we need to load a different model 81 81 - if _current_model_path != model_path_str: 82 82 - # Proactively clean up any previously loaded runner to release memory 83 83 - if _model_cache: 84 84 - try: 85 85 - for _old_runner in list(_model_cache.values()): 86 86 - try: 87 87 - _old_runner.cleanup() 88 88 - except Exception: 89 89 - pass 90 90 - finally: 91 91 - _model_cache.clear() 62 62 + model_name = model_spec 63 63 + if isinstance(model_cache_path, str): 64 64 + model_path_str = model_cache_path 65 65 + # Check if we need to load a different model 66 66 + if _current_model_path != model_path_str: 67 67 + # Proactively clean up any previously loaded runner to release memory 68 68 + if _model_cache: 69 69 + try: 70 70 + for _old_runner in list(_model_cache.values()): 71 71 + try: 72 72 + _old_runner.cleanup() 73 73 + except Exception: 74 74 + pass 75 75 + finally: 76 76 + _model_cache.clear() 92 77 93 93 - # Load new model 94 94 - if verbose: 95 95 - print(f"Loading model: {model_name}") 78 78 + # Load new model 79 79 + if verbose: 80 80 + print(f"Loading model: {model_name}") 96 81 97 97 - logger.info(f"Loading model: {model_name}") 98 98 - runner = MLXRunner(model_path_str, verbose=verbose) 99 99 - runner.load_model() 82 82 + logger.info(f"Loading model: {model_name}") 83 83 + runner = MLXRunner(model_path_str, verbose=verbose) 84 84 + runner.load_model() 100 85 101 101 - _model_cache[model_path_str] = runner 102 102 - _current_model_path = model_path_str 86 86 + _model_cache[model_path_str] = runner 87 87 + _current_model_path = model_path_str 88 88 + return runner 89 89 + else: 90 90 + logger.info(f"Model {model_name} already in memory") 91 91 + return _model_cache[_current_model_path] # pyright: ignore 103 92 else: 104 104 - logger.info(f"Model {model_name} already in memory") 105 105 - 106 106 - return _model_cache[model_path_str] 93 93 + logger.info(f"Model Path {_current_model_path} already in memory") 94 94 + return _model_cache[_current_model_path] # pyright: ignore 107 95 108 96 109 97 async def generate_chat_stream( ··· 114 102 _messages = messages 115 103 completion_id = f"chatcmpl-{uuid.uuid4()}" 116 104 created = int(time.time()) 117 117 - runner = get_or_load_model(request.model) 105 105 + runner = get_or_load_model(request.model, None) 118 106 if request.chat_start: 119 107 _messages.extend(request.messages) 120 108 # Convert messages to dict format for runner ··· 312 300 """Generate streaming chat responses for OpenResponses API.""" 313 301 model = request.model 314 302 created = int(time.time()) 315 315 - runner = get_or_load_model(model) 303 303 + runner = get_or_load_model(model, None) 316 304 metrics = None 317 305 318 306 user_input_content = "" ··· 491 479 response_id = f"resp-{uuid.uuid4()}" 492 480 msg_id = f"msg_{uuid.uuid4()}" 493 481 created = int(time.time()) 494 494 - runner = get_or_load_model(model) 482 482 + runner = get_or_load_model(model, None) 495 483 496 484 user_input_content = "" 497 485

server/schemas.py

reviewed

··· 81 81 model: str 82 82 memory_path: str 83 83 system_prompt: str 84 84 + model_cache_path: str 84 85 85 86 86 87 class downloadRequest(BaseModel):

+46 -31

tiles/src/runtime/mlx.rs

reviewed

··· 2 2 use crate::core::chats::{Message, save_chat}; 3 3 use crate::core::storage::db::get_db_conn; 4 4 use crate::runtime::RunArgs; 5 5 - use crate::utils::config::{ConfigProvider, DefaultProvider, get_memory_path}; 5 5 + use crate::utils::config::{ConfigProvider, DefaultProvider, get_memory_path, get_model_cache}; 6 6 use crate::utils::hf_model_downloader::*; 7 7 use anyhow::{Context, Result, anyhow}; 8 8 use futures_util::StreamExt; ··· 90 90 } 91 91 }; 92 92 93 93 - run_model_with_server(self, modelfile, default_modelfile, &run_args) 94 94 - .await 95 95 - .inspect_err(|e| eprintln!("Failed to run the model due to {e}")) 93 93 + run_model_with_server(self, modelfile, default_modelfile, &run_args).await 96 94 } 97 95 98 96 #[allow(clippy::zombie_processes)] ··· 404 402 default_modelfile: &Modelfile, 405 403 memory_path: &str, 406 404 ) -> Result<()> { 407 407 - let client = Client::new(); 408 405 let model_name = modelfile.from.clone().unwrap(); 409 409 - let body = json!({ 410 410 - "model": model_name, 411 411 - "memory_path": memory_path, 412 412 - "system_prompt": modelfile.system.clone().unwrap_or(default_modelfile.system.clone().unwrap_or("".to_owned())) 413 413 - }); 414 406 415 415 - let res = client 416 416 - .post("http://127.0.0.1:6969/start") 417 417 - .json(&body) 418 418 - .send() 419 419 - .await?; 420 420 - match res.status() { 421 421 - StatusCode::OK => Ok(()), 422 422 - StatusCode::NOT_FOUND => { 423 423 - println!("Downloading {}\n", model_name); 424 424 - match pull_model(&model_name).await { 425 425 - Ok(_) => { 426 426 - println!("\nDownloading completed \n"); 427 427 - Ok(()) 428 428 - } 429 429 - Err(err) => Err(anyhow::anyhow!(format!("Download failed due to {:?}", err))), 430 430 - } 431 431 - } 432 432 - _ => Err(anyhow::anyhow!(format!( 433 433 - "Failed to load model {} due to {:?}", 434 434 - model_name, res 435 435 - ))), 407 407 + if let Ok(model_cache_path) = get_model_cache(&model_name) { 408 408 + load_model_in_py(modelfile, default_modelfile, memory_path, &model_cache_path).await 409 409 + } else { 410 410 + download_model(&model_name).await?; 411 411 + let model_cache_path = get_model_cache(&model_name)?; 412 412 + load_model_in_py(modelfile, default_modelfile, memory_path, &model_cache_path).await 436 413 } 437 414 } 438 415 ··· 649 626 vec![dev_msg, input] 650 627 } 651 628 } 629 629 + 630 630 + async fn load_model_in_py( 631 631 + modelfile: &Modelfile, 632 632 + default_modelfile: &Modelfile, 633 633 + memory_path: &str, 634 634 + model_cache_path: &PathBuf, 635 635 + ) -> Result<()> { 636 636 + let client = Client::new(); 637 637 + let model_name = modelfile.from.clone().unwrap(); 638 638 + let body = json!({ 639 639 + "model": model_name, 640 640 + "memory_path": memory_path, 641 641 + "model_cache_path": model_cache_path, 642 642 + "system_prompt": modelfile.system.clone().unwrap_or(default_modelfile.system.clone().unwrap_or("".to_owned())) 643 643 + }); 644 644 + let res = client 645 645 + .post("http://127.0.0.1:6969/start") 646 646 + .json(&body) 647 647 + .send() 648 648 + .await?; 649 649 + match res.status() { 650 650 + StatusCode::OK => Ok(()), 651 651 + _ => Err(anyhow::anyhow!(format!( 652 652 + "Failed to load model {} due to {:?}", 653 653 + model_name, res 654 654 + ))), 655 655 + } 656 656 + } 657 657 + 658 658 + async fn download_model(model_name: &str) -> Result<()> { 659 659 + match pull_model(model_name).await { 660 660 + Ok(_) => { 661 661 + println!("\nDownloading completed \n"); 662 662 + Ok(()) 663 663 + } 664 664 + Err(err) => Err(anyhow::anyhow!(format!("Download failed due to {:?}", err))), 665 665 + } 666 666 + }

+69 -1

tiles/src/utils/config.rs

reviewed

··· 12 12 /// - /usr/local/share/tiles (lib dir) - Some internal App files, libraries etc go here.. 13 13 /// - /modelfiles 14 14 /// - /server 15 15 + /// - /models - Where the pre-downloaded models. 15 16 use anyhow::{Context, Result, anyhow}; 16 17 use std::fs::File; 17 18 use std::path::PathBuf; 18 19 use std::str::FromStr; 20 20 + use std::time::SystemTime; 19 21 use std::{env, fs}; 20 22 use toml::Table; 21 23 24 24 + const MODEL_SUB_PATH: &str = "models/huggingface/hub"; 22 25 pub trait ConfigProvider { 23 26 fn get_config_dir(&self) -> Result<PathBuf>; 24 27 fn get_or_create_config_dir(&self) -> Result<PathBuf>; ··· 115 118 fn get_lib_dir(&self) -> Result<PathBuf> { 116 119 if cfg!(debug_assertions) { 117 120 let base_dir = env::current_dir().context("Failed to fetch CURRENT_DIR")?; 118 118 - Ok(base_dir) 121 121 + Ok(base_dir.join(".tiles_dev/tiles")) 119 122 } else { 120 123 let data_dir = PathBuf::from_str("/usr/local/share")?; 121 124 Ok(data_dir.join("tiles")) ··· 229 232 fs::copy(&tmp_path, &config_path)?; 230 233 fs::remove_file(tmp_path)?; 231 234 Ok(()) 235 235 + } 236 236 + 237 237 + // Get the apt path where the model lies 238 238 + pub fn get_model_cache(model_name: &str) -> Result<PathBuf> { 239 239 + let hf_model_dir = if model_name.starts_with("mlx-community/") { 240 240 + let model_spec_parts = model_name.split("/").collect::<Vec<&str>>(); 241 241 + format!("models--{}--{}", model_spec_parts[0], model_spec_parts[1]) 242 242 + } else { 243 243 + return Err(anyhow!("Not implemented for non-mlx models")); 244 244 + }; 245 245 + 246 246 + let lib_dir = DefaultProvider.get_lib_dir()?; 247 247 + let pre_downloaded_model_path = lib_dir.join(MODEL_SUB_PATH).join(&hf_model_dir); 248 248 + let data_dir = DefaultProvider.get_user_data_dir()?; 249 249 + let user_data_dir_model_path = data_dir.join(MODEL_SUB_PATH).join(&hf_model_dir); 250 250 + 251 251 + let legacy_model_path = PathBuf::from(format!( 252 252 + "{}/.cache/huggingface/hub", 253 253 + env::home_dir().unwrap().to_str().unwrap() 254 254 + )) 255 255 + .join(&hf_model_dir); 256 256 + 257 257 + if pre_downloaded_model_path.exists() { 258 258 + get_commit_path(pre_downloaded_model_path) 259 259 + } else if user_data_dir_model_path.exists() { 260 260 + get_commit_path(user_data_dir_model_path) 261 261 + } else if legacy_model_path.exists() { 262 262 + get_commit_path(legacy_model_path) 263 263 + } else { 264 264 + Err(anyhow!("Model doesnt exist")) 265 265 + } 266 266 + } 267 267 + 268 268 + fn get_commit_path(base_path: PathBuf) -> Result<PathBuf> { 269 269 + let mut snapshots: Vec<(PathBuf, SystemTime)> = vec![]; 270 270 + let snapshot_path = base_path.join("snapshots"); 271 271 + if snapshot_path.exists() { 272 272 + for entry in snapshot_path.read_dir()? { 273 273 + if let Ok(item) = entry 274 274 + && item.path().is_dir() 275 275 + { 276 276 + snapshots.push((item.path(), item.path().metadata()?.modified()?)); 277 277 + } 278 278 + } 279 279 + if snapshots.is_empty() { 280 280 + Ok(base_path) 281 281 + } else { 282 282 + let latest_snapshot = snapshots 283 283 + .iter() 284 284 + .max_by_key(|a| a.1) 285 285 + .expect("Failed fetching latest snapshot"); 286 286 + Ok(latest_snapshot.0.clone()) 287 287 + } 288 288 + } else { 289 289 + Ok(base_path) 290 290 + } 291 291 + } 292 292 + 293 293 + pub fn get_or_create_model_download_path() -> Result<PathBuf> { 294 294 + let data_dir = DefaultProvider.get_user_data_dir()?; 295 295 + let model_dir = data_dir.join(MODEL_SUB_PATH); 296 296 + if !model_dir.exists() { 297 297 + fs::create_dir_all(&model_dir)?; 298 298 + } 299 299 + Ok(model_dir) 232 300 } 233 301 234 302 //TODO: Add more tests for config.toml

+9 -22

tiles/src/utils/hf_model_downloader.rs

reviewed

··· 1 1 /// Manages model snapshot downloading from HuggingFace 2 2 - use std::{env, path::PathBuf}; 3 3 - 2 2 + use anyhow::{Result, anyhow}; 4 3 use hf_hub::api::{ 5 4 Siblings, 6 5 tokio::{ApiBuilder, ApiError}, 7 6 }; 8 7 8 8 + use crate::utils::config::get_or_create_model_download_path; 9 9 + 9 10 /// Download the entire model (including snapshot) for the given model name 10 10 - pub async fn pull_model(model_name: &str) -> Result<(), String> { 11 11 + pub async fn pull_model(model_name: &str) -> Result<()> { 11 12 snapshot_download(model_name).await 12 13 } 13 14 14 14 - pub async fn snapshot_download(modelname: &str) -> Result<(), String> { 15 15 + pub async fn snapshot_download(modelname: &str) -> Result<()> { 15 16 let allow_patterns = [ 16 17 ".json", 17 18 ".txt", ··· 22 23 ]; 23 24 let api_build_result = ApiBuilder::new() 24 25 .with_progress(true) 25 25 - .with_cache_dir(PathBuf::from(get_model_cache())) 26 26 + .with_cache_dir(get_or_create_model_download_path()?) 26 27 .build(); 27 28 28 29 match api_build_result { ··· 42 43 43 44 for sibling in filtered_siblings { 44 45 if repo.get(&sibling.rfilename).await.is_err() { 45 45 - return Err(format!( 46 46 + return Err(anyhow!( 46 47 "{:?} failed to download, retry again", 47 48 &sibling.rfilename, 48 49 )); 49 50 } 50 51 } 51 52 } 52 52 - Err(err) => return Err(format_hf_api_error(err)), 53 53 + Err(err) => return Err(anyhow!(format_hf_api_error(err))), 53 54 }; 54 55 } 55 55 - Err(err) => return Err(format_hf_api_error(err)), 56 56 + Err(err) => return Err(anyhow!(format_hf_api_error(err))), 56 57 } 57 58 58 59 Ok(()) ··· 64 65 ApiError::TooManyRetries(err) => err.to_string(), 65 66 _err => "Something unexpected happened, check your internet connection".to_owned(), 66 67 } 67 67 - } 68 68 - 69 69 - fn get_model_cache() -> String { 70 70 - let default_cache = format!( 71 71 - "{}/.cache/huggingface", 72 72 - env::home_dir().unwrap().to_str().unwrap() 73 73 - ); 74 74 - let cache_root = if let Ok(home) = env::var("HF_HOME") { 75 75 - home.to_owned() 76 76 - } else { 77 77 - default_cache 78 78 - }; 79 79 - 80 80 - format!("{}/hub", cache_root) 81 68 } 82 69 83 70 #[cfg(test)]