refactor(stats): migrate scan_day + dashboard from events to activities

+8 -8

apps/stats/static/dashboard.js

··· 5 5 const Dashboard = (function() { 6 6 'use strict'; 7 7 8 - const EXPECTED_SCHEMA_VERSION = 3; 8 + const EXPECTED_SCHEMA_VERSION = 4; 9 9 const DISPLAY_LABELS = { transcript: 'Audio', percept: 'Screen' }; 10 10 11 11 // DOM element factory ··· 401 401 return palette[index % palette.length]; 402 402 } 403 403 404 - // Build stacked category chart (for Events or Facets) 404 + // Build stacked category chart (for Activities or Facets) 405 405 function buildStackedCategoryChart(container, countsByDay, meta = {}) { 406 406 container.innerHTML = ''; 407 407 ··· 722 722 } 723 723 ); 724 724 725 - // Render Events stacked bar chart 725 + // Render Activities stacked bar chart 726 726 buildStackedCategoryChart( 727 - document.getElementById('eventsChart'), 727 + document.getElementById('activitiesChart'), 728 728 stats.talents.counts_by_day || {}, 729 - Object.assign({}, data.generators || {}, { 729 + { 730 730 emptyIcon: '⚡', 731 - emptyText: 'No event data recorded', 732 - ariaLabel: 'Events bar chart showing agent event counts over the last 30 days' 733 - }) // Use generator metadata for titles/colors 731 + emptyText: 'No activity data recorded', 732 + ariaLabel: 'Activities bar chart showing activity counts over the last 30 days' 733 + } 734 734 ); 735 735 736 736 // Render repairs if needed

+2 -2

apps/stats/workspace.html

··· 604 604 </div> 605 605 606 606 <div class="chart-section"> 607 - <h2>Events (Last 30 Days)</h2> 608 - <div class="chart" id="eventsChart"></div> 607 + <h2>Activities (Last 30 Days)</h2> 608 + <div class="chart" id="activitiesChart"></div> 609 609 </div> 610 610 </div> 611 611 </div>

+13 -19

tests/test_journal_stats.py

··· 30 30 (day / "talents").mkdir() 31 31 (day / "talents" / "flow.md").write_text("") 32 32 33 - # Create event in new JSONL format: facets/{facet}/events/YYYYMMDD.jsonl 34 - events_dir = journal / "facets" / "work" / "events" 35 - events_dir.mkdir(parents=True) 36 - event = { 37 - "type": "meeting", 38 - "start": "00:00:00", 39 - "end": "00:05:00", 40 - "title": "t", 41 - "summary": "s", 42 - "work": True, 43 - "participants": [], 44 - "details": "", 45 - "facet": "work", 46 - "agent": "meetings", 47 - "occurred": True, 48 - "source": "20240101/talents/meetings.md", 33 + facet_dir = journal / "facets" / "work" 34 + facet_dir.mkdir(parents=True) 35 + (facet_dir / "facet.json").write_text(json.dumps({"title": "Work"})) 36 + activities_dir = facet_dir / "activities" 37 + activities_dir.mkdir(parents=True) 38 + activity = { 39 + "id": "meeting_000000_300", 40 + "activity": "meeting", 41 + "segments": ["000000_300"], 42 + "description": "Project sync", 49 43 } 50 - (events_dir / "20240101.jsonl").write_text(json.dumps(event)) 44 + (activities_dir / "20240101.jsonl").write_text(json.dumps(activity) + "\n") 51 45 52 46 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(journal)) 53 47 js = stats_mod.JournalStats() ··· 58 52 assert ( 59 53 js.days["20240101"]["pending_segments"] == 1 60 54 ) # Both files belong to same segment 61 - assert js.agent_counts["meetings"] == 1 55 + assert js.agent_counts["meeting"] == 1 62 56 assert js.facet_counts["work"] == 1 63 57 assert js.facet_minutes["work"] == 5.0 64 58 assert js.heatmap[0][0] == 5 ··· 169 163 170 164 # Test JSON output includes token usage 171 165 data = js.to_dict() 172 - assert data["schema_version"] == 3 166 + assert data["schema_version"] == 4 173 167 assert "generated_at" in data 174 168 assert data["day_count"] == 2 175 169 assert "tokens" in data

+11 -16

tests/test_stats_contract.py

··· 84 84 (seg2 / "audio.flac").write_bytes(b"fLaC") 85 85 (day / "talents" / "schedule.json").write_text("[]") 86 86 87 - events_dir = journal / "facets" / "work" / "events" 88 - events_dir.mkdir(parents=True) 89 - event = { 90 - "type": "meeting", 91 - "start": "09:00:00", 92 - "end": "09:30:00", 93 - "title": "standup", 94 - "summary": "daily sync", 95 - "work": True, 96 - "participants": [], 97 - "details": "", 98 - "facet": "work", 99 - "agent": "meetings", 100 - "occurred": True, 101 - "source": "20240101/talents/meetings.md", 87 + facet_dir = journal / "facets" / "work" 88 + facet_dir.mkdir(parents=True) 89 + (facet_dir / "facet.json").write_text(json.dumps({"title": "Work"})) 90 + activities_dir = facet_dir / "activities" 91 + activities_dir.mkdir(parents=True) 92 + activity = { 93 + "id": "meeting_090000_300", 94 + "activity": "meeting", 95 + "segments": ["090000_300"], 96 + "description": "daily sync", 102 97 } 103 - (events_dir / "20240101.jsonl").write_text(json.dumps(event) + "\n") 98 + (activities_dir / "20240101.jsonl").write_text(json.dumps(activity) + "\n") 104 99 105 100 tokens_dir = journal / "tokens" 106 101 tokens_dir.mkdir()

+49 -59

think/journal_stats.py

··· 12 12 13 13 from observe.sense import scan_day as sense_scan_day 14 14 from observe.utils import VIDEO_EXTENSIONS, load_analysis_frames 15 + from think.activities import estimate_duration_minutes, load_activity_records 16 + from think.facets import get_facets 15 17 from think.stats_schema import DAY_FIELDS, SCHEMA_VERSION 16 18 from think.stats_schema import validate as validate_stats 17 19 from think.talents import scan_day as generate_scan_day 18 - from think.utils import day_dirs, get_journal, setup_cli 20 + from think.utils import day_dirs, get_journal, segment_parse, setup_cli 19 21 20 22 logger = logging.getLogger(__name__) 21 23 ··· 258 260 stats["outputs_processed"] = len(output_info["processed"]) 259 261 stats["outputs_pending"] = len(output_info["repairable"]) 260 262 261 - # --- Events and heatmap from facets/*/events/YYYYMMDD.jsonl --- 263 + # --- Activities and heatmap from facets/*/activities/YYYYMMDD.jsonl --- 262 264 weekday = datetime.strptime(day, "%Y%m%d").weekday() 263 - journal_root = Path(get_journal()) 264 - facets_dir = journal_root / "facets" 265 - 266 - if facets_dir.is_dir(): 267 - for facet_name in os.listdir(facets_dir): 268 - events_dir = facets_dir / facet_name / "events" 269 - if not events_dir.is_dir(): 270 - continue 271 - events_file = events_dir / f"{day}.jsonl" 272 - if not events_file.exists(): 273 - continue 274 - 275 - try: 276 - with open(events_file, "r", encoding="utf-8") as f: 277 - for line in f: 278 - line = line.strip() 279 - if not line: 280 - continue 281 - try: 282 - event = json.loads(line) 283 - except json.JSONDecodeError: 284 - continue 265 + for facet_name, _facet_meta in get_facets().items(): 266 + activities_file = ( 267 + Path(get_journal()) 268 + / "facets" 269 + / facet_name 270 + / "activities" 271 + / f"{day}.jsonl" 272 + ) 273 + try: 274 + records = load_activity_records(facet_name, day) 275 + for record in records: 276 + activity_type = record.get("activity") or "unknown" 277 + segments = record.get("segments") or [] 278 + if not segments: 279 + continue 285 280 286 - agent = event.get("agent", "unknown") 287 - if agent not in agent_data: 288 - agent_data[agent] = {"count": 0, "minutes": 0.0} 289 - agent_data[agent]["count"] += 1 281 + if activity_type not in agent_data: 282 + agent_data[activity_type] = {"count": 0, "minutes": 0.0} 283 + agent_data[activity_type]["count"] += 1 290 284 291 - start = event.get("start") 292 - end = event.get("end") 293 - try: 294 - sh, sm, ss = map(int, start.split(":")) 295 - eh, em, es = map(int, end.split(":")) 296 - except (ValueError, AttributeError, TypeError): 297 - continue 285 + duration_minutes = float(estimate_duration_minutes(segments)) 286 + agent_data[activity_type]["minutes"] += duration_minutes 298 287 299 - start_sec = sh * 3600 + sm * 60 + ss 300 - end_sec = eh * 3600 + em * 60 + es 301 - duration = max(0, end_sec - start_sec) 302 - agent_data[agent]["minutes"] += duration / 60 288 + if facet_name not in facet_data: 289 + facet_data[facet_name] = {"count": 0, "minutes": 0.0} 290 + facet_data[facet_name]["count"] += 1 291 + facet_data[facet_name]["minutes"] += duration_minutes 303 292 304 - # Track facet stats 305 - facet = event.get("facet", facet_name) 306 - if facet not in facet_data: 307 - facet_data[facet] = {"count": 0, "minutes": 0.0} 308 - facet_data[facet]["count"] += 1 309 - facet_data[facet]["minutes"] += duration / 60 293 + # Build heatmap hours for this day 294 + for seg in segments: 295 + start, end = segment_parse(seg) 296 + if start is None or end is None: 297 + continue 310 298 311 - # Build heatmap hours for this day 312 - cur = start_sec 313 - while cur < end_sec: 314 - hour = cur // 3600 315 - if hour >= 24: 316 - break 317 - next_tick = min((hour + 1) * 3600, end_sec) 318 - minutes = (next_tick - cur) / 60 319 - heatmap_hours[str(hour)] = ( 320 - heatmap_hours.get(str(hour), 0.0) + minutes 321 - ) 322 - cur = next_tick 323 - except (OSError, IOError) as e: 324 - logger.warning(f"Error reading {events_file}: {e}") 299 + start_sec = start.hour * 3600 + start.minute * 60 + start.second 300 + end_sec = end.hour * 3600 + end.minute * 60 + end.second 301 + cur = start_sec 302 + while cur < end_sec: 303 + hour = cur // 3600 304 + if hour >= 24: 305 + break 306 + next_tick = min((hour + 1) * 3600, end_sec) 307 + minutes = (next_tick - cur) / 60 308 + heatmap_hours[str(hour)] = ( 309 + heatmap_hours.get(str(hour), 0.0) + minutes 310 + ) 311 + cur = next_tick 312 + except (OSError, IOError) as e: 313 + logger.warning(f"Error reading {activities_file}: {e}") 325 314 326 315 # --- Disk usage --- 327 316 stats["day_bytes"] = sum( ··· 334 323 335 324 return { 336 325 "stats": dict(stats), 326 + # NOTE: agent_data keys are now activity types (e.g., "meeting", "coding"), not extractor agent names. Key name retained for cache-format compatibility. 337 327 "agent_data": agent_data, 338 328 "facet_data": facet_data, 339 329 "heatmap_data": {"weekday": weekday, "hours": heatmap_hours},

+1 -1

think/stats_schema.py

Configure Feed

Configure Feed