···3737 try:
3838 with open(stats_path, "r", encoding="utf-8") as f:
3939 response["stats"] = json.load(f)
4040+ response["file_mtime"] = os.path.getmtime(stats_path)
4041 except Exception:
4142 logger.exception("Failed to read stats data")
4243 response["error"] = "Failed to read stats data"
+8-5
tests/test_journal_stats.py
···169169170170 # Test JSON output includes token usage
171171 data = js.to_dict()
172172- assert "token_usage_by_day" in data
173173- assert "token_totals_by_model" in data
174174- assert "total_transcript_duration" in data
175175- assert "total_percept_duration" in data
172172+ assert data["schema_version"] == 2
173173+ assert "generated_at" in data
174174+ assert data["day_count"] == 2
175175+ assert "tokens" in data
176176+ assert "by_day" in data["tokens"]
177177+ assert "total_transcript_duration" in data["totals"]
178178+ assert "total_percept_duration" in data["totals"]
176179 assert (
177177- data["token_usage_by_day"]["20240101"]["gemini-2.5-flash"]["total_tokens"]
180180+ data["tokens"]["by_day"]["20240101"]["gemini-2.5-flash"]["total_tokens"]
178181 == 495
179182 )
180183
+104
tests/test_stats_schema.py
···11+# SPDX-License-Identifier: AGPL-3.0-only
22+# Copyright (c) 2026 sol pbc
33+44+import importlib
55+66+import pytest
77+88+99+def test_validate_passes_on_valid_output(tmp_path, monkeypatch):
1010+ """Build a JournalStats from fixture data, call to_dict(), validate."""
1111+ stats_mod = importlib.import_module("think.journal_stats")
1212+ schema_mod = importlib.import_module("think.stats_schema")
1313+ journal = tmp_path
1414+ day = journal / "20240101"
1515+ day.mkdir()
1616+1717+ # Create minimal transcript fixture
1818+ ts_dir = day / "default" / "123456_300"
1919+ ts_dir.mkdir(parents=True)
2020+ (ts_dir / "audio.jsonl").write_text(
2121+ '{"raw": "raw.flac"}\n'
2222+ '{"start": "10:00:00", "text": "hello"}\n'
2323+ )
2424+2525+ monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(journal))
2626+ js = stats_mod.JournalStats()
2727+ js.scan(str(journal))
2828+2929+ data = js.to_dict()
3030+ errors = schema_mod.validate(data)
3131+ assert errors == [], f"Validation errors: {errors}"
3232+3333+3434+def test_validate_rejects_missing_fields():
3535+ """Incomplete dicts should produce non-empty error lists."""
3636+ schema_mod = importlib.import_module("think.stats_schema")
3737+3838+ # Empty dict
3939+ errors = schema_mod.validate({})
4040+ assert len(errors) > 0
4141+ assert any("schema_version" in e for e in errors)
4242+4343+ # Missing days
4444+ errors = schema_mod.validate(
4545+ {"schema_version": 2, "generated_at": "2026-04-10T00:00:00+00:00"}
4646+ )
4747+ assert any("days" in e for e in errors)
4848+4949+ # Wrong schema version
5050+ errors = schema_mod.validate(
5151+ {
5252+ "schema_version": 99,
5353+ "generated_at": "x",
5454+ "day_count": 0,
5555+ "days": {},
5656+ "totals": {},
5757+ "heatmap": [],
5858+ "tokens": {},
5959+ "agents": {},
6060+ "facets": {},
6161+ }
6262+ )
6363+ assert any("schema_version" in e for e in errors)
6464+6565+6666+def test_save_json_raises_on_invalid(tmp_path, monkeypatch):
6767+ """save_json() must raise ValueError when validation fails."""
6868+ stats_mod = importlib.import_module("think.journal_stats")
6969+ monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
7070+ js = stats_mod.JournalStats()
7171+ # Corrupt the schema version so validation fails
7272+ original = js.to_dict
7373+ js.to_dict = lambda: {**original(), "schema_version": 99}
7474+ with pytest.raises(ValueError, match="Stats validation failed"):
7575+ js.save_json(str(tmp_path))
7676+7777+7878+def test_day_fields_present_in_scan_day(tmp_path, monkeypatch):
7979+ """Verify every key in DAY_FIELDS appears in scan_day output."""
8080+ stats_mod = importlib.import_module("think.journal_stats")
8181+ schema_mod = importlib.import_module("think.stats_schema")
8282+ journal = tmp_path
8383+ day = journal / "20240101"
8484+ day.mkdir()
8585+8686+ # Create transcript and percept fixtures
8787+ ts_dir = day / "default" / "123456_300"
8888+ ts_dir.mkdir(parents=True)
8989+ (ts_dir / "audio.jsonl").write_text(
9090+ '{"raw": "raw.flac"}\n'
9191+ '{"start": "10:00:00", "text": "hello"}\n'
9292+ )
9393+ (ts_dir / "screen.jsonl").write_text(
9494+ '{"header": true}\n'
9595+ '{"frame_id": 1, "timestamp": "10:00:00"}\n'
9696+ )
9797+9898+ monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(journal))
9999+ js = stats_mod.JournalStats()
100100+ day_data = js.scan_day("20240101", str(day))
101101+102102+ stats = day_data["stats"]
103103+ for field in schema_mod.DAY_FIELDS:
104104+ assert field in stats, f"DAY_FIELDS field '{field}' missing from scan_day output"
+33-13
think/journal_stats.py
···1313from observe.sense import scan_day as sense_scan_day
1414from observe.utils import VIDEO_EXTENSIONS, load_analysis_frames
1515from think.agents import scan_day as generate_scan_day
1616+from think.stats_schema import DAY_FIELDS, SCHEMA_VERSION, validate as validate_stats
1617from think.utils import day_dirs, get_journal, setup_cli
17181819logger = logging.getLogger(__name__)
···503504504505 def to_dict(self) -> dict:
505506 """Return a dictionary with all collected statistics."""
507507+ days = {
508508+ day: {field: stats.get(field, 0) for field in DAY_FIELDS}
509509+ for day, stats in self.days.items()
510510+ }
506511 return {
507507- "days": self.days,
508508- "totals": dict(self.totals),
509509- "total_transcript_duration": self.total_transcript_duration,
510510- "total_percept_duration": self.total_percept_duration,
511511- "agent_counts": dict(self.agent_counts),
512512- "agent_minutes": {k: round(v, 2) for k, v in self.agent_minutes.items()},
513513- "agent_counts_by_day": self.agent_counts_by_day,
514514- "facet_counts": dict(self.facet_counts),
515515- "facet_minutes": {k: round(v, 2) for k, v in self.facet_minutes.items()},
516516- "facet_counts_by_day": self.facet_counts_by_day,
512512+ "schema_version": SCHEMA_VERSION,
513513+ "generated_at": datetime.now(timezone.utc).isoformat(),
514514+ "day_count": len(self.days),
515515+ "days": days,
516516+ "totals": {
517517+ **dict(self.totals),
518518+ "total_transcript_duration": self.total_transcript_duration,
519519+ "total_percept_duration": self.total_percept_duration,
520520+ },
517521 "heatmap": self.heatmap,
518518- "token_usage_by_day": self.token_usage,
519519- "token_totals_by_model": self.token_totals,
522522+ "tokens": {
523523+ "by_day": self.token_usage,
524524+ "by_model": self.token_totals,
525525+ },
526526+ "agents": {
527527+ "counts": dict(self.agent_counts),
528528+ "minutes": {k: round(v, 2) for k, v in self.agent_minutes.items()},
529529+ "counts_by_day": self.agent_counts_by_day,
530530+ },
531531+ "facets": {
532532+ "counts": dict(self.facet_counts),
533533+ "minutes": {k: round(v, 2) for k, v in self.facet_minutes.items()},
534534+ "counts_by_day": self.facet_counts_by_day,
535535+ },
520536 }
521537522538 def save_json(self, journal: str) -> None:
523539 """Write full statistics to ``stats.json`` in ``journal``."""
540540+ data = self.to_dict()
541541+ errors = validate_stats(data)
542542+ if errors:
543543+ raise ValueError(f"Stats validation failed: {'; '.join(errors)}")
524544 path = os.path.join(journal, "stats.json")
525545 with open(path, "w", encoding="utf-8") as f:
526526- json.dump(self.to_dict(), f, indent=2)
546546+ json.dump(data, f, indent=2)
527547528548529549def main() -> None:
+76
think/stats_schema.py
···11+# SPDX-License-Identifier: AGPL-3.0-only
22+# Copyright (c) 2026 sol pbc
33+44+SCHEMA_VERSION = 2
55+66+DAY_FIELDS = (
77+ "transcript_sessions",
88+ "transcript_segments",
99+ "transcript_duration",
1010+ "percept_sessions",
1111+ "percept_frames",
1212+ "percept_duration",
1313+ "pending_segments",
1414+ "outputs_processed",
1515+ "outputs_pending",
1616+ "day_bytes",
1717+)
1818+1919+TOTAL_FIELDS = (
2020+ "transcript_sessions",
2121+ "transcript_segments",
2222+ "transcript_duration",
2323+ "percept_sessions",
2424+ "percept_frames",
2525+ "percept_duration",
2626+ "pending_segments",
2727+ "outputs_processed",
2828+ "outputs_pending",
2929+ "day_bytes",
3030+ "total_transcript_duration",
3131+ "total_percept_duration",
3232+)
3333+3434+REQUIRED_TOP_LEVEL = (
3535+ "schema_version",
3636+ "generated_at",
3737+ "day_count",
3838+ "days",
3939+ "totals",
4040+ "heatmap",
4141+ "tokens",
4242+ "agents",
4343+ "facets",
4444+)
4545+4646+4747+def validate(data: dict) -> list[str]:
4848+ """Validate stats output against schema v2. Returns list of error strings (empty = valid)."""
4949+ errors = []
5050+5151+ # Check schema_version
5252+ if "schema_version" not in data:
5353+ errors.append("missing 'schema_version'")
5454+ elif data["schema_version"] != SCHEMA_VERSION:
5555+ errors.append(f"schema_version is {data['schema_version']}, expected {SCHEMA_VERSION}")
5656+5757+ # Check generated_at
5858+ if "generated_at" not in data:
5959+ errors.append("missing 'generated_at'")
6060+ elif not isinstance(data["generated_at"], str):
6161+ errors.append("'generated_at' must be a string")
6262+6363+ # Check required top-level keys
6464+ for key in REQUIRED_TOP_LEVEL:
6565+ if key not in data:
6666+ errors.append(f"missing required key '{key}'")
6767+6868+ # Spot-check one day entry if days is non-empty
6969+ days = data.get("days", {})
7070+ if isinstance(days, dict) and days:
7171+ first_day = next(iter(days.values()))
7272+ for field in DAY_FIELDS:
7373+ if field not in first_day:
7474+ errors.append(f"day entry missing field '{field}'")
7575+7676+ return errors