personal memory agent
1# SPDX-License-Identifier: AGPL-3.0-only
2# Copyright (c) 2026 sol pbc
3
4"""Media file metadata detection utilities."""
5
6from __future__ import annotations
7
8import json
9import os
10import subprocess
11import sys
12from datetime import datetime, timezone
13from pathlib import Path
14from typing import Optional
15
16from .prompts import load_prompt
17
18_SCHEMA = json.loads(
19 (Path(__file__).parent / "detect_created.schema.json").read_text(encoding="utf-8")
20)
21
22
23def _load_system_prompt() -> str:
24 """Load the system prompt from detect_created.txt file."""
25 return load_prompt("detect_created", base_dir=Path(__file__).parent).text
26
27
28def _extract_metadata(path: str) -> str:
29 """Return metadata for *path* using exiftool if available."""
30 cmd = [
31 "exiftool",
32 "-all",
33 path,
34 ]
35 try:
36 proc = subprocess.run(cmd, capture_output=True, text=True, check=True)
37 return proc.stdout
38 except Exception as exc: # pragma: no cover - exiftool optional
39 return f"Error extracting metadata: {exc}"
40
41
42def _debug_write_content(content: str, path: str) -> None:
43 """Write content to a debug file in /tmp for diagnosis."""
44 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
45 filename = f"gemini_debug_{timestamp}_{os.path.basename(path)}.md"
46 debug_path = os.path.join("/tmp", filename)
47
48 with open(debug_path, "w", encoding="utf-8") as f:
49 f.write(content)
50
51 print(f"Debug: Content written to {debug_path}", file=sys.stderr)
52
53
54def detect_created(
55 path: str, original_filename: Optional[str] = None, guidance: Optional[str] = None
56) -> Optional[dict]:
57 """Return creation time information for *path* using configured provider.
58
59 Parameters
60 ----------
61 path : str
62 Path to the file to analyze
63 original_filename : Optional[str]
64 Original filename if path is a temporary file
65 guidance : Optional[str]
66 Optional guidance text from the user to help the LLM interpret ambiguous metadata
67 """
68 metadata = _extract_metadata(path)
69
70 # Use original filename in header if provided, otherwise use the actual path
71 display_path = original_filename if original_filename else path
72
73 lines = [
74 f"# exiftool -all output for {display_path}",
75 "",
76 ]
77
78 # If we have an original filename and it's different from path, add a note
79 if original_filename and original_filename != path:
80 lines.extend(
81 [
82 f"Original filename: {original_filename}",
83 f"(Analyzing temporary file: {path})",
84 "",
85 ]
86 )
87
88 lines.append(metadata)
89 markdown = "\n".join(lines)
90 if guidance:
91 markdown += f"\n\nImportant guidance from the user: {guidance}"
92
93 # Debug: write content to temp file
94 _debug_write_content(markdown, path)
95
96 from think.models import generate
97
98 response_text = generate(
99 contents=markdown,
100 context="detect.created",
101 temperature=0.3,
102 max_output_tokens=256,
103 thinking_budget=4096,
104 system_instruction=_load_system_prompt(),
105 json_output=True,
106 json_schema=_SCHEMA,
107 )
108
109 try:
110 result = json.loads(response_text)
111
112 # Convert UTC to local time if needed
113 if result and result.get("utc") is True:
114 day = result.get("day")
115 time = result.get("time")
116
117 if day and time:
118 # Parse as UTC datetime
119 utc_dt = datetime.strptime(f"{day}{time}", "%Y%m%d%H%M%S")
120 utc_dt = utc_dt.replace(tzinfo=timezone.utc)
121
122 # Convert to local timezone
123 local_dt = utc_dt.astimezone()
124
125 # Update result with local time
126 result["day"] = local_dt.strftime("%Y%m%d")
127 result["time"] = local_dt.strftime("%H%M%S")
128
129 return result
130 except json.JSONDecodeError:
131 return None