Improve extraction failure diagnostics and token usage logging

+3 -2

muse/anticipation.py

··· 15 15 from think.facets import facet_summaries 16 16 from think.hooks import ( 17 17 compute_output_source, 18 + log_extraction_failure, 18 19 should_skip_extraction, 19 20 write_events_jsonl, 20 21 ) ··· 64 65 json_output=True, 65 66 ) 66 67 except Exception as e: 67 - logging.error("Extraction generation failed: %s", e) 68 + log_extraction_failure(e, name) 68 69 return None 69 70 70 71 try: 71 72 events = json.loads(response_text) 72 73 except json.JSONDecodeError as e: 73 - logging.error("Invalid JSON from extraction: %s", e) 74 + logging.error("Invalid JSON from anticipation extraction: %s", e) 74 75 return None 75 76 76 77 if not isinstance(events, list):

+3 -2

muse/occurrence.py

··· 15 15 from think.facets import facet_summaries 16 16 from think.hooks import ( 17 17 compute_output_source, 18 + log_extraction_failure, 18 19 should_skip_extraction, 19 20 write_events_jsonl, 20 21 ) ··· 69 70 json_output=True, 70 71 ) 71 72 except Exception as e: 72 - logging.error("Extraction generation failed: %s", e) 73 + log_extraction_failure(e, name) 73 74 return None 74 75 75 76 try: 76 77 events = json.loads(response_text) 77 78 except json.JSONDecodeError as e: 78 - logging.error("Invalid JSON from extraction: %s", e) 79 + logging.error("Invalid JSON from occurrence extraction: %s", e) 79 80 return None 80 81 81 82 if not isinstance(events, list):

+45

think/hooks.py

··· 8 8 """ 9 9 10 10 import json 11 + import logging 11 12 import os 12 13 from pathlib import Path 13 14 ··· 44 45 return f"minimal content ({len(result.strip())} chars < {MIN_EXTRACTION_CHARS})" 45 46 46 47 return None 48 + 49 + 50 + def log_extraction_failure(e: Exception, name: str) -> None: 51 + """Log enhanced diagnostics for extraction generation failures. 52 + 53 + Handles IncompleteJSONError specially by logging head+tail of the partial 54 + text and detecting possible degenerate token repetition. 55 + 56 + Args: 57 + e: The exception from generate(). 58 + name: Generator name for log context. 59 + """ 60 + from think.models import IncompleteJSONError 61 + 62 + if not isinstance(e, IncompleteJSONError): 63 + logging.error("Extraction generation failed for %s: %s", name, e) 64 + return 65 + 66 + partial = e.partial_text 67 + length = len(partial) 68 + 69 + # Log head + tail of partial output 70 + if length <= 400: 71 + preview = partial 72 + else: 73 + preview = f"{partial[:200]}\n...[{length} chars total]...\n{partial[-200:]}" 74 + 75 + # Repetition detection: count unique chars in last 1000 76 + tail = partial[-1000:] if length >= 1000 else partial 77 + unique_count = len(set(tail)) 78 + repetition_flag = "" 79 + if unique_count < 20: 80 + repetition_flag = f" [POSSIBLE DEGENERATE REPETITION: {unique_count} unique chars in last {len(tail)}]" 81 + 82 + logging.error( 83 + "Extraction generation failed for %s: %s " 84 + "(partial_text: %d chars, %d unique in tail%s)\n%s", 85 + name, 86 + e, 87 + length, 88 + unique_count, 89 + repetition_flag, 90 + preview, 91 + ) 47 92 48 93 49 94 def write_events_jsonl(

+15 -10

think/models.py

··· 982 982 **kwargs, 983 983 ) 984 984 985 - # Validate JSON output if requested 986 - _validate_json_response(result, json_output) 987 - 988 - # Log token usage centrally 985 + # Log token usage centrally (before validation so truncated responses 986 + # still get their usage recorded) 989 987 if result.get("usage"): 990 988 log_token_usage( 991 989 model=model, ··· 993 991 context=context, 994 992 type="generate", 995 993 ) 994 + 995 + # Validate JSON output if requested 996 + _validate_json_response(result, json_output) 996 997 997 998 return result["text"] 998 999 ··· 1136 1137 **kwargs, 1137 1138 ) 1138 1139 1139 - _validate_json_response(result, json_output) 1140 - 1140 + # Log token usage centrally (before validation so truncated responses 1141 + # still get their usage recorded) 1141 1142 if result.get("usage"): 1142 1143 log_token_usage( 1143 1144 model=model, ··· 1145 1146 context=context, 1146 1147 type="generate", 1147 1148 ) 1149 + 1150 + # Validate JSON output if requested 1151 + _validate_json_response(result, json_output) 1148 1152 1149 1153 return result 1150 1154 ··· 1224 1228 **kwargs, 1225 1229 ) 1226 1230 1227 - # Validate JSON output if requested 1228 - _validate_json_response(result, json_output) 1229 - 1230 - # Log token usage centrally 1231 + # Log token usage centrally (before validation so truncated responses 1232 + # still get their usage recorded) 1231 1233 if result.get("usage"): 1232 1234 log_token_usage( 1233 1235 model=model, ··· 1235 1237 context=context, 1236 1238 type="generate", 1237 1239 ) 1240 + 1241 + # Validate JSON output if requested 1242 + _validate_json_response(result, json_output) 1238 1243 1239 1244 return result["text"] 1240 1245

Configure Feed

Configure Feed