···11+# model pkg command, run when model changes, or need a local copy for final pkg
22+MODELS_VERSION=1.0
33+pkgbuild --root pkgroot_models --identifier com.tilesprivacy.tiles_models --version "$MODELS_VERSION" tiles-model.pkg
+42
pkg/pkg_building.md
···11+## How the Tiles pkgs are build
22+33+### Network Installer
44+55+Network installer is basically Tiles without any ML models included in it.
66+So when model is needed, Tiles will download it. (Maybe in a later version
77+a user should be able to download from its peers locally too).
88+99+```
1010+just bundle_pkg
1111+```
1212+1313+Creates tiles-<VERSION>.pkg, signs and notarize it
1414+1515+1616+### Offline Installer
1717+1818+Offline Installer includes the default model too in it, so once
1919+downloaded provides a portable installer, and can work w/o
2020+internet forever and ever...
2121+2222+```
2323+just bundle_model_pkg
2424+2525+```
2626+2727+This will bundle only the model in a .pkg.
2828+2929+> We run this command only when a model is updated/added etc..
3030+Since this is a time taking process and is not needed to run
3131+in every release build
3232+3333+The basic approach we will take for offline installer building is that
3434+we build 2 pkgs essentially, the network installer and a pkg with
3535+only models. Then we create a final package that has these 2 pkgs with
3636+the command below.
3737+3838+3939+```
4040+just bundle_pkg_full
4141+4242+```
+2-1
server/api.py
···4747async def start_model(request: StartRequest):
4848 """Load the model and start the agent"""
4949 global _messages, _runner, _memory_path
5050+ print(f"CACHE PATH{request.model_cache_path}")
50515152 _messages = [ChatMessage(role="system", content=request.system_prompt)]
5253 _memory_path = request.memory_path
5354 logger.info(f"{runtime.backend}")
5454- runtime.backend.get_or_load_model(request.model)
5555+ runtime.backend.get_or_load_model(request.model, request.model_cache_path)
5556 return {"message": "Model loaded"}
56575758
+36-48
server/backend/mlx.py
···33import time
44import uuid
55from collections.abc import AsyncGenerator
66-66+from pathlib import Path
77from fastapi import HTTPException
88from openai_harmony import (
99 Conversation,
···5454 raise HTTPException(status_code=400, detail="Downloading model failed")
555556565757-def get_or_load_model(model_spec: str, verbose: bool = True) -> MLXRunner:
5757+def get_or_load_model(
5858+ model_spec: str, model_cache_path: str | None = None, verbose: bool = True
5959+) -> MLXRunner:
5860 """Get model from cache or load it if not cached."""
5961 global _model_cache, _current_model_path
6060-6161- # Use the existing model path resolution from cache_utils
6262-6363- try:
6464- model_path, model_name, commit_hash = get_model_path(model_spec)
6565- if not model_path.exists():
6666- logger.info(f"Model {model_spec} not found in cache")
6767- raise HTTPException(
6868- status_code=404, detail=f"Model {model_spec} not found in cache"
6969- )
7070- except Exception as e:
7171- logger.info(f"Model {model_spec} not found in: {str(e)}")
7272- raise HTTPException(
7373- status_code=404, detail=f"Model {model_spec} not found: {str(e)}"
7474- )
7575-7676- # Check if it's an MLX model
7777-7878- model_path_str = str(model_path)
7979-8080- # Check if we need to load a different model
8181- if _current_model_path != model_path_str:
8282- # Proactively clean up any previously loaded runner to release memory
8383- if _model_cache:
8484- try:
8585- for _old_runner in list(_model_cache.values()):
8686- try:
8787- _old_runner.cleanup()
8888- except Exception:
8989- pass
9090- finally:
9191- _model_cache.clear()
6262+ model_name = model_spec
6363+ if isinstance(model_cache_path, str):
6464+ model_path_str = model_cache_path
6565+ # Check if we need to load a different model
6666+ if _current_model_path != model_path_str:
6767+ # Proactively clean up any previously loaded runner to release memory
6868+ if _model_cache:
6969+ try:
7070+ for _old_runner in list(_model_cache.values()):
7171+ try:
7272+ _old_runner.cleanup()
7373+ except Exception:
7474+ pass
7575+ finally:
7676+ _model_cache.clear()
92779393- # Load new model
9494- if verbose:
9595- print(f"Loading model: {model_name}")
7878+ # Load new model
7979+ if verbose:
8080+ print(f"Loading model: {model_name}")
96819797- logger.info(f"Loading model: {model_name}")
9898- runner = MLXRunner(model_path_str, verbose=verbose)
9999- runner.load_model()
8282+ logger.info(f"Loading model: {model_name}")
8383+ runner = MLXRunner(model_path_str, verbose=verbose)
8484+ runner.load_model()
10085101101- _model_cache[model_path_str] = runner
102102- _current_model_path = model_path_str
8686+ _model_cache[model_path_str] = runner
8787+ _current_model_path = model_path_str
8888+ return runner
8989+ else:
9090+ logger.info(f"Model {model_name} already in memory")
9191+ return _model_cache[_current_model_path] # pyright: ignore
10392 else:
104104- logger.info(f"Model {model_name} already in memory")
105105-106106- return _model_cache[model_path_str]
9393+ logger.info(f"Model Path {_current_model_path} already in memory")
9494+ return _model_cache[_current_model_path] # pyright: ignore
107951089610997async def generate_chat_stream(
···114102 _messages = messages
115103 completion_id = f"chatcmpl-{uuid.uuid4()}"
116104 created = int(time.time())
117117- runner = get_or_load_model(request.model)
105105+ runner = get_or_load_model(request.model, None)
118106 if request.chat_start:
119107 _messages.extend(request.messages)
120108 # Convert messages to dict format for runner
···312300 """Generate streaming chat responses for OpenResponses API."""
313301 model = request.model
314302 created = int(time.time())
315315- runner = get_or_load_model(model)
303303+ runner = get_or_load_model(model, None)
316304 metrics = None
317305318306 user_input_content = ""
···491479 response_id = f"resp-{uuid.uuid4()}"
492480 msg_id = f"msg_{uuid.uuid4()}"
493481 created = int(time.time())
494494- runner = get_or_load_model(model)
482482+ runner = get_or_load_model(model, None)
495483496484 user_input_content = ""
497485