think/event_formatter.py at main · solpbc.org/solstone

solpbc.org / solstone
fork
personal memory agent
fork
solstone / think / event_formatter.py
at main 166 lines 5.6 kB view raw
wrap content
Jer Miller refactor(events): migrate live consumers + delete reader functions 15d ago
53762ba9
  1# SPDX-License-Identifier: AGPL-3.0-only
  2# Copyright (c) 2026 sol pbc
  3
  4"""Event formatting for journal event JSONL files."""
  5
  6import logging
  7import re
  8from datetime import datetime
  9from pathlib import Path
 10from typing import Any
 11
 12
 13def format_events(
 14    entries: list[dict],
 15    context: dict | None = None,
 16) -> tuple[list[dict], dict]:
 17    """Format event JSONL entries to markdown chunks.
 18
 19    This is the formatter function used by the formatters registry.
 20
 21    Args:
 22        entries: Raw JSONL entries (one event per line)
 23        context: Optional context with:
 24            - file_path: Path to JSONL file (for extracting facet name and day)
 25
 26    Returns:
 27        Tuple of (chunks, meta) where:
 28            - chunks: List of dicts with keys:
 29                - timestamp: int (unix ms)
 30                - markdown: str
 31                - source: dict (original event entry)
 32            - meta: Dict with optional "header" and "error" keys
 33    """
 34    ctx = context or {}
 35    file_path = ctx.get("file_path")
 36    meta: dict[str, Any] = {}
 37    chunks: list[dict[str, Any]] = []
 38    skipped_count = 0
 39
 40    # Extract facet name and day from path
 41    facet_name = "unknown"
 42    day_str: str | None = None
 43
 44    if file_path:
 45        file_path = Path(file_path)
 46
 47        # Extract facet name from path: facets/{facet}/events/YYYYMMDD.jsonl
 48        path_str = str(file_path)
 49        facet_match = re.search(r"facets/([^/]+)/events", path_str)
 50        if facet_match:
 51            facet_name = facet_match.group(1)
 52
 53        # Extract day from filename
 54        if file_path.stem.isdigit() and len(file_path.stem) == 8:
 55            day_str = file_path.stem
 56
 57    # Calculate base timestamp (midnight of the event day) in milliseconds
 58    base_ts = 0
 59    if day_str:
 60        try:
 61            dt = datetime.strptime(day_str, "%Y%m%d")
 62            base_ts = int(dt.timestamp() * 1000)
 63        except ValueError:
 64            pass
 65
 66    # Build header
 67    if day_str:
 68        formatted_day = f"{day_str[:4]}-{day_str[4:6]}-{day_str[6:8]}"
 69        meta["header"] = f"# Events for '{facet_name}' facet on {formatted_day}"
 70    else:
 71        meta["header"] = f"# Events for '{facet_name}' facet"
 72
 73    # Format each event as a chunk
 74    for event in entries:
 75        # Skip entries without title
 76        title = event.get("title")
 77        if not title:
 78            skipped_count += 1
 79            continue
 80
 81        event_type = event.get("type", "event").capitalize()
 82        occurred = event.get("occurred", True)
 83
 84        # Calculate timestamp from day + start time
 85        ts = base_ts
 86        start_time = event.get("start", "")
 87        if start_time and base_ts:
 88            try:
 89                # Parse HH:MM:SS or HH:MM
 90                time_parts = start_time.split(":")
 91                hours = int(time_parts[0])
 92                minutes = int(time_parts[1]) if len(time_parts) > 1 else 0
 93                seconds = int(time_parts[2]) if len(time_parts) > 2 else 0
 94                ts = base_ts + (hours * 3600 + minutes * 60 + seconds) * 1000
 95            except (ValueError, IndexError):
 96                pass
 97
 98        # Build markdown
 99        type_prefix = "Planned " if not occurred else ""
100        lines = [f"### {type_prefix}{event_type}: {title}\n", ""]
101
102        # Time range (24h format, strip seconds for display)
103        end_time = event.get("end", "")
104        time_label = "Occurred" if occurred else "Scheduled"
105        if start_time:
106            start_display = start_time[:5] if len(start_time) >= 5 else start_time
107            if end_time:
108                end_display = end_time[:5] if len(end_time) >= 5 else end_time
109                lines.append(f"**Time {time_label}:** {start_display} - {end_display}")
110            else:
111                lines.append(f"**Time {time_label}:** {start_display}")
112
113        # Participants
114        participants = event.get("participants", [])
115        if participants and isinstance(participants, list):
116            participants_label = (
117                "Expected Participants" if not occurred else "Participants"
118            )
119            lines.append(f"**{participants_label}:** {', '.join(participants)}")
120
121        # For future-dated event rows, show when they were created (from source path)
122        if not occurred:
123            source = event.get("source", "")
124            # Extract YYYYMMDD from source path like "20240101/talents/agent.md"
125            source_match = re.match(r"(\d{8})/", source)
126            if source_match:
127                created_day = source_match.group(1)
128                created_formatted = (
129                    f"{created_day[:4]}-{created_day[4:6]}-{created_day[6:8]}"
130                )
131                lines.append(f"**Created on:** {created_formatted}")
132
133        lines.append("")
134
135        # Summary
136        summary = event.get("summary", "")
137        if summary:
138            lines.append(summary)
139            lines.append("")
140
141        # Details
142        details = event.get("details", "")
143        if details:
144            lines.append(details)
145            lines.append("")
146
147        chunks.append(
148            {
149                "timestamp": ts,
150                "markdown": "\n".join(lines),
151                "source": event,
152            }
153        )
154
155    # Report skipped entries
156    if skipped_count > 0:
157        error_msg = f"Skipped {skipped_count} entries missing 'title' field"
158        if file_path:
159            error_msg += f" in {file_path}"
160        meta["error"] = error_msg
161        logging.info(error_msg)
162
163    # Indexer metadata - agent is always "event" for events
164    meta["indexer"] = {"agent": "event"}
165
166    return chunks, meta
Configure Feed

Configure Feed