Merge branch 'hopper-q6ot7shw-stats-cache-fixes'

+103 -5

2 changed files

expand all

tests

test_journal_stats.py

think

journal_stats.py

+61

tests/test_journal_stats.py

··· 221 221 assert js3.days["20240101"]["transcript_sessions"] == 1 222 222 223 223 224 + def test_facet_event_mtime_invalidates_cache(tmp_path, monkeypatch): 225 + """Modifying a facet event file invalidates that day's cache.""" 226 + stats_mod = importlib.import_module("think.journal_stats") 227 + journal = tmp_path 228 + day = journal / "20240101" 229 + day.mkdir() 230 + 231 + # Create minimal day content 232 + ts_dir = day / "default" / "123456_300" 233 + ts_dir.mkdir(parents=True) 234 + (ts_dir / "audio.jsonl").write_text( 235 + '{"raw": "raw.flac"}\n' 236 + '{"start": "10:00:00", "text": "hello"}\n' 237 + ) 238 + 239 + # Create facet event file 240 + events_dir = journal / "facets" / "work" / "events" 241 + events_dir.mkdir(parents=True) 242 + event = { 243 + "type": "meeting", 244 + "start": "00:00:00", 245 + "end": "00:05:00", 246 + "title": "t", 247 + "summary": "s", 248 + "work": True, 249 + "participants": [], 250 + "details": "", 251 + "facet": "work", 252 + "agent": "meetings", 253 + "occurred": True, 254 + "source": "20240101/agents/meetings.md", 255 + } 256 + (events_dir / "20240101.jsonl").write_text(json.dumps(event)) 257 + 258 + monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(journal)) 259 + 260 + # First scan - creates cache 261 + js1 = stats_mod.JournalStats() 262 + js1.scan(str(journal), use_cache=True) 263 + assert js1.agent_counts["meetings"] == 1 264 + assert (day / "stats.json").exists() 265 + 266 + # Record cache mtime 267 + import time 268 + 269 + cache_mtime = (day / "stats.json").stat().st_mtime 270 + time.sleep(0.05) 271 + 272 + # Modify the facet event file (add a second event) 273 + event2 = dict(event, start="01:00:00", end="01:10:00", agent="summarize") 274 + with open(events_dir / "20240101.jsonl", "a") as f: 275 + f.write("\n" + json.dumps(event2)) 276 + 277 + # Second scan - cache should be invalidated because facet event mtime > cache mtime 278 + js2 = stats_mod.JournalStats() 279 + js2.scan(str(journal), use_cache=True) 280 + assert (day / "stats.json").stat().st_mtime > cache_mtime 281 + assert js2.agent_counts["meetings"] == 1 282 + assert js2.agent_counts["summarize"] == 1 283 + 284 + 224 285 def test_token_usage_new_format(tmp_path, monkeypatch): 225 286 """Test that the new unified token format is properly handled.""" 226 287 stats_mod = importlib.import_module("think.journal_stats")

+42 -5

think/journal_stats.py

··· 6 6 import logging 7 7 import os 8 8 from collections import Counter 9 - from datetime import datetime 9 + from datetime import datetime, timezone 10 10 from pathlib import Path 11 11 from typing import Dict 12 12 ··· 58 58 files.extend(agents_dir.glob("*.md")) 59 59 files.extend(agents_dir.glob("*/*.json")) 60 60 files.extend(agents_dir.glob("*/*.md")) 61 + 62 + # Check facet event files for this day 63 + journal_root = day_dir.parent 64 + day = day_dir.name 65 + facets_dir = journal_root / "facets" 66 + if facets_dir.is_dir(): 67 + for facet_name in os.listdir(facets_dir): 68 + event_file = facets_dir / facet_name / "events" / f"{day}.jsonl" 69 + if event_file.is_file(): 70 + files.append(event_file) 61 71 62 72 if not files: 63 73 return 0.0 ··· 337 347 "heatmap_data": {"weekday": weekday, "hours": heatmap_hours}, 338 348 } 339 349 340 - def scan_all_tokens(self, journal_path: Path) -> None: 350 + def scan_all_tokens(self, journal_path: Path, use_cache: bool = True) -> None: 341 351 """Scan all token usage files in the tokens directory. 342 352 343 353 Reads daily *.jsonl files (one JSON object per line). ··· 346 356 if not tokens_dir.is_dir(): 347 357 return 348 358 359 + today = datetime.now(timezone.utc).strftime("%Y%m%d") 360 + 349 361 # Scan JSONL files only 350 362 for token_file in tokens_dir.glob("*.jsonl"): 363 + day = token_file.stem 364 + cache_file = token_file.parent / f"{day}.tokens_cache.json" 365 + 366 + if use_cache and day != today and cache_file.exists(): 367 + try: 368 + if cache_file.stat().st_mtime > token_file.stat().st_mtime: 369 + with open(cache_file, encoding="utf-8") as f: 370 + cached = json.load(f) 371 + self.token_usage[day] = cached 372 + for model, counts in cached.items(): 373 + if model not in self.token_totals: 374 + self.token_totals[model] = {} 375 + for token_type, count in counts.items(): 376 + if token_type not in self.token_totals[model]: 377 + self.token_totals[model][token_type] = 0 378 + self.token_totals[model][token_type] += count 379 + continue 380 + except Exception as e: 381 + logger.debug(f"Token cache load failed for {token_file}: {e}") 382 + 351 383 try: 352 384 with open(token_file, "r", encoding="utf-8") as f: 353 385 for line in f: ··· 365 397 logger.warning(f"Error reading token file {token_file}: {e}") 366 398 continue 367 399 400 + if use_cache and day != today: 401 + try: 402 + with open(cache_file, "w", encoding="utf-8") as f: 403 + json.dump(self.token_usage.get(day, {}), f) 404 + except Exception as e: 405 + logger.debug(f"Token cache save failed for {token_file}: {e}") 406 + 368 407 def _process_token_entry(self, data: dict) -> None: 369 408 """Process a single token usage entry (expects normalized format).""" 370 - from datetime import datetime, timezone 371 - 372 409 # Extract date from timestamp 373 410 timestamp = data.get("timestamp") 374 411 if not timestamp: ··· 449 486 self._save_day_cache(day_dir, day_data) 450 487 451 488 # Scan tokens directory once after all days are processed 452 - self.scan_all_tokens(Path(journal)) 489 + self.scan_all_tokens(Path(journal), use_cache=use_cache) 453 490 454 491 if verbose: 455 492 cache_status = (

Configure Feed

Configure Feed