feat: add lexicon-based caching and adoption guide

+80 -2

README.md

··· 21 21 22 22 # filter by namespace 23 23 uvx pmgfal -p fm.plyr 24 + 25 + # force regeneration (skip cache) 26 + uvx pmgfal --no-cache 24 27 ``` 25 28 29 + ## caching 30 + 31 + pmgfal caches generated models based on a hash of your lexicon files. on subsequent runs with unchanged lexicons, it copies from cache instead of regenerating. 32 + 33 + cache location: 34 + - macos: `~/Library/Caches/pmgfal/` 35 + - linux: `~/.cache/pmgfal/` 36 + - windows: `%LOCALAPPDATA%/pmgfal/` 37 + 38 + the cache key includes: 39 + - pmgfal version (cache invalidates on upgrade) 40 + - namespace prefix filter 41 + - content of all lexicon json files 42 + 26 43 ## output 27 44 28 45 ```python ··· 42 59 duration_ms: int | None = Field(default=None, alias="durationMs") 43 60 ``` 44 61 62 + ## adoption guide 63 + 64 + ### 1. add lexicons to your project 65 + 66 + ``` 67 + your-project/ 68 + ├── lexicons/ 69 + │ └── fm/ 70 + │ └── plyr/ 71 + │ ├── track.json 72 + │ ├── like.json 73 + │ └── comment.json 74 + ├── src/ 75 + │ └── models/ 76 + │ └── .gitkeep 77 + └── pyproject.toml 78 + ``` 79 + 80 + ### 2. generate models 81 + 82 + ```bash 83 + uvx pmgfal ./lexicons -o ./src/models -p fm.plyr 84 + ``` 85 + 86 + ### 3. use in your code 87 + 88 + ```python 89 + from src.models.models import FmPlyrTrack, FmPlyrLike 90 + 91 + track = FmPlyrTrack( 92 + uri="at://did:plc:xyz/fm.plyr.track/123", 93 + title="my song", 94 + artist="me", 95 + ) 96 + ``` 97 + 98 + ### 4. regenerate when lexicons change 99 + 100 + add to your build/ci: 101 + 102 + ```bash 103 + uvx pmgfal ./lexicons -o ./src/models -p fm.plyr 104 + ``` 105 + 106 + caching ensures this is fast when lexicons haven't changed. 107 + 108 + ## external refs 109 + 110 + pmgfal bundles all `com.atproto.*` lexicons and automatically resolves external refs. for example, if your lexicon references `com.atproto.repo.strongRef`, pmgfal generates: 111 + 112 + ```python 113 + class ComAtprotoRepoStrongRef(BaseModel): 114 + uri: str 115 + cid: str 116 + 117 + class FmPlyrLike(BaseModel): 118 + subject: ComAtprotoRepoStrongRef # properly typed! 119 + created_at: str = Field(alias="createdAt") 120 + ``` 121 + 45 122 ## how it works 46 123 47 124 1. parses lexicon json using [atrium-lex](https://github.com/atrium-rs/atrium) (rust) 48 - 2. generates pydantic v2 models 49 - 3. outputs standalone python - no atproto sdk dependency 125 + 2. resolves internal (`#localDef`) and external (`com.atproto.*`) refs 126 + 3. generates pydantic v2 models with field aliases 127 + 4. outputs standalone python - no atproto sdk dependency

+65 -2

python/pmgfal/__init__.py

··· 3 3 from __future__ import annotations 4 4 5 5 import argparse 6 + import hashlib 7 + import os 8 + import shutil 6 9 import sys 7 10 from pathlib import Path 8 11 9 12 from pmgfal._pmgfal import __version__, generate 10 13 11 - __all__ = ["__version__", "generate", "main"] 14 + __all__ = ["__version__", "generate", "main", "get_cache_dir"] 15 + 16 + 17 + def get_cache_dir() -> Path: 18 + """get the user cache directory for pmgfal.""" 19 + if sys.platform == "darwin": 20 + base = Path.home() / "Library" / "Caches" 21 + elif sys.platform == "win32": 22 + base = Path(os.environ.get("LOCALAPPDATA", Path.home() / "AppData" / "Local")) 23 + else: 24 + base = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache")) 25 + return base / "pmgfal" 26 + 27 + 28 + def hash_lexicons(lexicon_dir: Path, prefix: str | None = None) -> str: 29 + """compute a hash of all lexicon files in a directory.""" 30 + hasher = hashlib.sha256() 31 + 32 + # include version in hash so cache invalidates on upgrades 33 + hasher.update(__version__.encode()) 34 + 35 + # include prefix in hash 36 + if prefix: 37 + hasher.update(prefix.encode()) 38 + 39 + # hash all json files in sorted order for determinism 40 + json_files = sorted(lexicon_dir.rglob("*.json")) 41 + for path in json_files: 42 + hasher.update(path.name.encode()) 43 + hasher.update(path.read_bytes()) 44 + 45 + return hasher.hexdigest()[:16] 12 46 13 47 14 48 def main(args: list[str] | None = None) -> int: ··· 38 72 help="namespace prefix filter (e.g. 'fm.plyr')", 39 73 ) 40 74 parser.add_argument( 75 + "--no-cache", 76 + action="store_true", 77 + help="force regeneration, ignoring cache", 78 + ) 79 + parser.add_argument( 41 80 "-V", 42 81 "--version", 43 82 action="version", ··· 60 99 return 1 61 100 62 101 try: 102 + # compute hash of lexicons 103 + lexicon_hash = hash_lexicons(lexicon_dir, parsed.prefix) 104 + cache_dir = get_cache_dir() / lexicon_hash 105 + 106 + # check cache 107 + if not parsed.no_cache and cache_dir.exists(): 108 + # cache hit - copy cached files to output 109 + parsed.output.mkdir(parents=True, exist_ok=True) 110 + cached_files = list(cache_dir.glob("*.py")) 111 + for cached in cached_files: 112 + dest = parsed.output / cached.name 113 + shutil.copy2(cached, dest) 114 + print(f"cache hit ({lexicon_hash}) - copied {len(cached_files)} file(s):") 115 + for f in cached_files: 116 + print(f" {parsed.output / f.name}") 117 + return 0 118 + 119 + # cache miss - generate 63 120 files = generate( 64 121 str(lexicon_dir), 65 122 str(parsed.output), 66 123 parsed.prefix, 67 124 ) 68 - print(f"generated {len(files)} file(s):") 125 + 126 + # store in cache 127 + cache_dir.mkdir(parents=True, exist_ok=True) 128 + for f in files: 129 + shutil.copy2(f, cache_dir / Path(f).name) 130 + 131 + print(f"generated {len(files)} file(s) (cached as {lexicon_hash}):") 69 132 for f in files: 70 133 print(f" {f}") 71 134 return 0

+69

tests/test_generate.py

··· 251 251 assert "class ComAtprotoRepoStrongRef(BaseModel):" in content 252 252 assert "uri: str" in content 253 253 assert "cid: str" in content 254 + 255 + 256 + class TestCaching: 257 + """test caching behavior.""" 258 + 259 + def test_cache_hit(self): 260 + """second run should hit cache.""" 261 + from pmgfal import main 262 + 263 + lexicon = { 264 + "lexicon": 1, 265 + "id": "test.cache", 266 + "defs": { 267 + "main": { 268 + "type": "record", 269 + "record": {"type": "object", "properties": {"x": {"type": "string"}}}, 270 + } 271 + }, 272 + } 273 + 274 + with tempfile.TemporaryDirectory() as tmpdir: 275 + lexicon_dir = Path(tmpdir) / "lexicons" 276 + lexicon_dir.mkdir() 277 + (lexicon_dir / "cache.json").write_text(json.dumps(lexicon)) 278 + 279 + output_dir = Path(tmpdir) / "generated" 280 + 281 + # first run - cache miss 282 + result = main([str(lexicon_dir), "-o", str(output_dir)]) 283 + assert result == 0 284 + assert (output_dir / "models.py").exists() 285 + 286 + # delete output to prove cache works 287 + (output_dir / "models.py").unlink() 288 + 289 + # second run - cache hit 290 + result = main([str(lexicon_dir), "-o", str(output_dir)]) 291 + assert result == 0 292 + assert (output_dir / "models.py").exists() 293 + 294 + def test_no_cache_flag(self): 295 + """--no-cache should force regeneration.""" 296 + from pmgfal import main 297 + 298 + lexicon = { 299 + "lexicon": 1, 300 + "id": "test.nocache", 301 + "defs": { 302 + "main": { 303 + "type": "record", 304 + "record": {"type": "object", "properties": {"y": {"type": "string"}}}, 305 + } 306 + }, 307 + } 308 + 309 + with tempfile.TemporaryDirectory() as tmpdir: 310 + lexicon_dir = Path(tmpdir) / "lexicons" 311 + lexicon_dir.mkdir() 312 + (lexicon_dir / "nocache.json").write_text(json.dumps(lexicon)) 313 + 314 + output_dir = Path(tmpdir) / "generated" 315 + 316 + # first run 317 + result = main([str(lexicon_dir), "-o", str(output_dir)]) 318 + assert result == 0 319 + 320 + # second run with --no-cache 321 + result = main([str(lexicon_dir), "-o", str(output_dir), "--no-cache"]) 322 + assert result == 0

Configure Feed

Configure Feed