Refactor thicket architecture into unified library API

+260

code_duplication_analysis.md

··· 1 + # Code Duplication Analysis for Thicket 2 + 3 + ## 1. Duplicate JSON Handling Code 4 + 5 + ### Pattern: JSON file reading/writing 6 + **Locations:** 7 + - `src/thicket/cli/commands/generate.py:230` - Reading JSON with `json.load(f)` 8 + - `src/thicket/cli/commands/generate.py:249` - Reading links.json 9 + - `src/thicket/cli/commands/index.py:2305` - Reading JSON 10 + - `src/thicket/cli/commands/index.py:2320` - Writing JSON with `json.dump()` 11 + - `src/thicket/cli/commands/threads.py:2456` - Reading JSON 12 + - `src/thicket/cli/commands/info.py:2683` - Reading JSON 13 + - `src/thicket/core/git_store.py:5546` - Writing JSON with custom serializer 14 + - `src/thicket/core/git_store.py:5556` - Reading JSON 15 + - `src/thicket/core/git_store.py:5566` - Writing JSON 16 + - `src/thicket/core/git_store.py:5656` - Writing JSON with model dump 17 + 18 + **Recommendation:** Create a shared `json_utils.py` module: 19 + ```python 20 + def read_json_file(path: Path) -> dict: 21 + """Read JSON file with error handling.""" 22 + with open(path) as f: 23 + return json.load(f) 24 + 25 + def write_json_file(path: Path, data: dict, indent: int = 2) -> None: 26 + """Write JSON file with consistent formatting.""" 27 + with open(path, "w") as f: 28 + json.dump(data, f, indent=indent, default=str) 29 + 30 + def write_model_json(path: Path, model: BaseModel, indent: int = 2) -> None: 31 + """Write Pydantic model as JSON.""" 32 + with open(path, "w") as f: 33 + json.dump(model.model_dump(mode="json", exclude_none=True), f, indent=indent, default=str) 34 + ``` 35 + 36 + ## 2. Repeated Datetime Handling 37 + 38 + ### Pattern: datetime formatting and fallback handling 39 + **Locations:** 40 + - `src/thicket/cli/commands/generate.py:241` - `key=lambda x: x[1].updated or x[1].published or datetime.min` 41 + - `src/thicket/cli/commands/generate.py:353` - Same pattern in thread sorting 42 + - `src/thicket/cli/commands/generate.py:359` - Same pattern for max date 43 + - `src/thicket/cli/commands/generate.py:625` - Same pattern 44 + - `src/thicket/cli/commands/generate.py:655` - `entry.updated or entry.published or datetime.min` 45 + - `src/thicket/cli/commands/generate.py:689` - Same pattern 46 + - `src/thicket/cli/commands/generate.py:702` - Same pattern 47 + - Multiple `.strftime('%Y-%m-%d')` calls throughout 48 + 49 + **Recommendation:** Create a shared `datetime_utils.py` module: 50 + ```python 51 + def get_entry_date(entry: AtomEntry) -> datetime: 52 + """Get the most relevant date for an entry with fallback.""" 53 + return entry.updated or entry.published or datetime.min 54 + 55 + def format_date_short(dt: datetime) -> str: 56 + """Format datetime as YYYY-MM-DD.""" 57 + return dt.strftime('%Y-%m-%d') 58 + 59 + def format_date_full(dt: datetime) -> str: 60 + """Format datetime as YYYY-MM-DD HH:MM.""" 61 + return dt.strftime('%Y-%m-%d %H:%M') 62 + 63 + def format_date_iso(dt: datetime) -> str: 64 + """Format datetime as ISO string.""" 65 + return dt.isoformat() 66 + ``` 67 + 68 + ## 3. Path Handling Patterns 69 + 70 + ### Pattern: Directory creation and existence checks 71 + **Locations:** 72 + - `src/thicket/cli/commands/generate.py:225` - `if user_dir.exists()` 73 + - `src/thicket/cli/commands/generate.py:247` - `if links_file.exists()` 74 + - `src/thicket/cli/commands/generate.py:582` - `self.output_dir.mkdir(parents=True, exist_ok=True)` 75 + - `src/thicket/cli/commands/generate.py:585-586` - Multiple mkdir calls 76 + - `src/thicket/cli/commands/threads.py:2449` - `if not index_path.exists()` 77 + - `src/thicket/cli/commands/info.py:2681` - `if links_path.exists()` 78 + - `src/thicket/core/git_store.py:5515` - `if not self.repo_path.exists()` 79 + - `src/thicket/core/git_store.py:5586` - `user_dir.mkdir(exist_ok=True)` 80 + - Many more similar patterns 81 + 82 + **Recommendation:** Create a shared `path_utils.py` module: 83 + ```python 84 + def ensure_directory(path: Path) -> Path: 85 + """Ensure directory exists, creating if necessary.""" 86 + path.mkdir(parents=True, exist_ok=True) 87 + return path 88 + 89 + def read_json_if_exists(path: Path, default: Any = None) -> Any: 90 + """Read JSON file if it exists, otherwise return default.""" 91 + if path.exists(): 92 + with open(path) as f: 93 + return json.load(f) 94 + return default 95 + 96 + def safe_path_join(*parts: Union[str, Path]) -> Path: 97 + """Safely join path components.""" 98 + return Path(*parts) 99 + ``` 100 + 101 + ## 4. Progress Bar and Console Output 102 + 103 + ### Pattern: Progress bar creation and updates 104 + **Locations:** 105 + - `src/thicket/cli/commands/generate.py:209` - Progress with SpinnerColumn 106 + - `src/thicket/cli/commands/index.py:2230` - Same Progress pattern 107 + - Multiple `console.print()` calls with similar formatting patterns 108 + - Progress update patterns repeated 109 + 110 + **Recommendation:** Create a shared `ui_utils.py` module: 111 + ```python 112 + def create_progress_spinner(description: str) -> tuple[Progress, TaskID]: 113 + """Create a standard progress spinner.""" 114 + progress = Progress( 115 + SpinnerColumn(), 116 + TextColumn("[progress.description]{task.description}"), 117 + transient=True, 118 + ) 119 + task = progress.add_task(description) 120 + return progress, task 121 + 122 + def print_success(message: str) -> None: 123 + """Print success message with consistent formatting.""" 124 + console.print(f"[green]✓[/green] {message}") 125 + 126 + def print_error(message: str) -> None: 127 + """Print error message with consistent formatting.""" 128 + console.print(f"[red]Error: {message}[/red]") 129 + 130 + def print_warning(message: str) -> None: 131 + """Print warning message with consistent formatting.""" 132 + console.print(f"[yellow]Warning: {message}[/yellow]") 133 + ``` 134 + 135 + ## 5. Git Store Operations 136 + 137 + ### Pattern: Entry file operations 138 + **Locations:** 139 + - Multiple patterns of loading entries from user directories 140 + - Repeated safe_id generation 141 + - Repeated user directory path construction 142 + 143 + **Recommendation:** Enhance GitStore with helper methods: 144 + ```python 145 + def get_user_dir(self, username: str) -> Path: 146 + """Get user directory path.""" 147 + return self.repo_path / username 148 + 149 + def iter_user_entries(self, username: str) -> Iterator[tuple[Path, AtomEntry]]: 150 + """Iterate over all entries for a user.""" 151 + user_dir = self.get_user_dir(username) 152 + if user_dir.exists(): 153 + for entry_file in user_dir.glob("*.json"): 154 + if entry_file.name not in ["index.json", "duplicates.json"]: 155 + try: 156 + entry = self.read_entry_file(entry_file) 157 + yield entry_file, entry 158 + except Exception: 159 + continue 160 + ``` 161 + 162 + ## 6. Error Handling Patterns 163 + 164 + ### Pattern: Try-except with console error printing 165 + **Locations:** 166 + - Similar error handling patterns throughout CLI commands 167 + - Repeated `raise typer.Exit(1)` patterns 168 + - Similar exception message formatting 169 + 170 + **Recommendation:** Create error handling decorators: 171 + ```python 172 + def handle_cli_errors(func): 173 + """Decorator to handle CLI command errors consistently.""" 174 + @functools.wraps(func) 175 + def wrapper(*args, **kwargs): 176 + try: 177 + return func(*args, **kwargs) 178 + except ValidationError as e: 179 + console.print(f"[red]Validation error: {e}[/red]") 180 + raise typer.Exit(1) 181 + except Exception as e: 182 + console.print(f"[red]Error: {e}[/red]") 183 + if kwargs.get('verbose'): 184 + console.print_exception() 185 + raise typer.Exit(1) 186 + return wrapper 187 + ``` 188 + 189 + ## 7. Configuration and Validation 190 + 191 + ### Pattern: Config file loading and validation 192 + **Locations:** 193 + - Repeated config loading pattern in every CLI command 194 + - Similar validation patterns for URLs and paths 195 + 196 + **Recommendation:** Create a `config_utils.py` module: 197 + ```python 198 + def load_config_with_defaults(config_path: Optional[Path] = None) -> ThicketConfig: 199 + """Load config with standard defaults and error handling.""" 200 + if config_path is None: 201 + config_path = Path("thicket.yaml") 202 + 203 + if not config_path.exists(): 204 + raise ConfigError(f"Configuration file not found: {config_path}") 205 + 206 + return load_config(config_path) 207 + 208 + def validate_url(url: str) -> HttpUrl: 209 + """Validate and return URL with consistent error handling.""" 210 + try: 211 + return HttpUrl(url) 212 + except ValidationError: 213 + raise ConfigError(f"Invalid URL: {url}") 214 + ``` 215 + 216 + ## 8. Model Serialization 217 + 218 + ### Pattern: Pydantic model JSON encoding 219 + **Locations:** 220 + - Repeated `json_encoders={datetime: lambda v: v.isoformat()}` in model configs 221 + - Similar model_dump patterns 222 + 223 + **Recommendation:** Create base model class: 224 + ```python 225 + class ThicketBaseModel(BaseModel): 226 + """Base model with common configuration.""" 227 + model_config = ConfigDict( 228 + json_encoders={datetime: lambda v: v.isoformat()}, 229 + str_strip_whitespace=True, 230 + ) 231 + 232 + def to_json_dict(self) -> dict: 233 + """Convert to JSON-serializable dict.""" 234 + return self.model_dump(mode="json", exclude_none=True) 235 + ``` 236 + 237 + ## Summary of Refactoring Benefits 238 + 239 + 1. **Reduced Code Duplication**: Eliminate 30-40% of duplicate code 240 + 2. **Consistent Error Handling**: Standardize error messages and handling 241 + 3. **Easier Maintenance**: Central location for common patterns 242 + 4. **Better Testing**: Easier to unit test shared utilities 243 + 5. **Type Safety**: Shared type hints and validation 244 + 6. **Performance**: Potential to optimize common operations in one place 245 + 246 + ## Implementation Priority 247 + 248 + 1. **High Priority**: 249 + - JSON utilities (used everywhere) 250 + - Datetime utilities (critical for sorting and display) 251 + - Error handling decorators (improves UX consistency) 252 + 253 + 2. **Medium Priority**: 254 + - Path utilities 255 + - UI/Console utilities 256 + - Config utilities 257 + 258 + 3. **Low Priority**: 259 + - Base model classes (requires more refactoring) 260 + - Git store enhancements (already well-structured)

+6617

repomix-output.xml

··· 1 + This file is a merged representation of the entire codebase, combined into a single document by Repomix. 2 + 3 + <file_summary> 4 + This section contains a summary of this file. 5 + 6 + <purpose> 7 + This file contains a packed representation of the entire repository's contents. 8 + It is designed to be easily consumable by AI systems for analysis, code review, 9 + or other automated processes. 10 + </purpose> 11 + 12 + <file_format> 13 + The content is organized as follows: 14 + 1. This summary section 15 + 2. Repository information 16 + 3. Directory structure 17 + 4. Repository files (if enabled) 18 + 5. Multiple file entries, each consisting of: 19 + - File path as an attribute 20 + - Full contents of the file 21 + </file_format> 22 + 23 + <usage_guidelines> 24 + - This file should be treated as read-only. Any changes should be made to the 25 + original repository files, not this packed version. 26 + - When processing this file, use the file path to distinguish 27 + between different files in the repository. 28 + - Be aware that this file may contain sensitive information. Handle it with 29 + the same level of security as you would the original repository. 30 + </usage_guidelines> 31 + 32 + <notes> 33 + - Some files may have been excluded based on .gitignore rules and Repomix's configuration 34 + - Binary files are not included in this packed representation. Please refer to the Repository Structure section for a complete list of file paths, including binary files 35 + - Files matching patterns in .gitignore are excluded 36 + - Files matching default ignore patterns are excluded 37 + - Files are sorted by Git change count (files with more changes are at the bottom) 38 + </notes> 39 + 40 + </file_summary> 41 + 42 + <directory_structure> 43 + .claude/ 44 + settings.local.json 45 + src/ 46 + thicket/ 47 + cli/ 48 + commands/ 49 + __init__.py 50 + add.py 51 + duplicates.py 52 + generate.py 53 + index_cmd.py 54 + info_cmd.py 55 + init.py 56 + links_cmd.py 57 + list_cmd.py 58 + sync.py 59 + __init__.py 60 + main.py 61 + utils.py 62 + core/ 63 + __init__.py 64 + feed_parser.py 65 + git_store.py 66 + reference_parser.py 67 + models/ 68 + __init__.py 69 + config.py 70 + feed.py 71 + user.py 72 + templates/ 73 + base.html 74 + index.html 75 + links.html 76 + script.js 77 + style.css 78 + timeline.html 79 + users.html 80 + utils/ 81 + __init__.py 82 + __init__.py 83 + __main__.py 84 + .gitignore 85 + ARCH.md 86 + CLAUDE.md 87 + pyproject.toml 88 + README.md 89 + </directory_structure> 90 + 91 + <files> 92 + This section contains the contents of the repository's files. 93 + 94 + <file path=".claude/settings.local.json"> 95 + { 96 + "permissions": { 97 + "allow": [ 98 + "Bash(find:*)", 99 + "Bash(uv run:*)", 100 + "Bash(grep:*)", 101 + "Bash(jq:*)", 102 + "Bash(git add:*)", 103 + "Bash(ls:*)" 104 + ] 105 + }, 106 + "enableAllProjectMcpServers": false 107 + } 108 + </file> 109 + 110 + <file path="src/thicket/cli/commands/generate.py"> 111 + """Generate static HTML website from thicket data.""" 112 + 113 + import base64 114 + import json 115 + import re 116 + import shutil 117 + from datetime import datetime 118 + from pathlib import Path 119 + from typing import Any, Optional, TypedDict, Union 120 + 121 + import typer 122 + from jinja2 import Environment, FileSystemLoader, select_autoescape 123 + from rich.progress import Progress, SpinnerColumn, TextColumn 124 + 125 + from ...core.git_store import GitStore 126 + from ...models.feed import AtomEntry 127 + from ...models.user import GitStoreIndex, UserMetadata 128 + from ..main import app 129 + from ..utils import console, load_config 130 + 131 + 132 + class UserData(TypedDict): 133 + """Type definition for user data structure.""" 134 + 135 + metadata: UserMetadata 136 + recent_entries: list[tuple[str, AtomEntry]] 137 + 138 + 139 + def safe_anchor_id(atom_id: str) -> str: 140 + """Convert an Atom ID to a safe HTML anchor ID.""" 141 + # Use base64 URL-safe encoding without padding 142 + encoded = base64.urlsafe_b64encode(atom_id.encode('utf-8')).decode('ascii').rstrip('=') 143 + # Prefix with 'id' to ensure it starts with a letter (HTML requirement) 144 + return f"id{encoded}" 145 + 146 + 147 + class WebsiteGenerator: 148 + """Generate static HTML website from thicket data.""" 149 + 150 + def __init__(self, git_store: GitStore, output_dir: Path): 151 + self.git_store = git_store 152 + self.output_dir = output_dir 153 + self.template_dir = Path(__file__).parent.parent.parent / "templates" 154 + 155 + # Initialize Jinja2 environment 156 + self.env = Environment( 157 + loader=FileSystemLoader(self.template_dir), 158 + autoescape=select_autoescape(["html", "xml"]), 159 + ) 160 + 161 + # Data containers 162 + self.index: Optional[GitStoreIndex] = None 163 + self.entries: list[tuple[str, AtomEntry]] = [] # (username, entry) 164 + self.links_data: Optional[dict[str, Any]] = None 165 + self.threads: list[list[dict[str, Any]]] = [] # List of threads with metadata 166 + 167 + def get_display_name(self, username: str) -> str: 168 + """Get display name for a user, falling back to username.""" 169 + if self.index and username in self.index.users: 170 + user = self.index.users[username] 171 + return user.display_name or username 172 + return username 173 + 174 + def get_user_homepage(self, username: str) -> Optional[str]: 175 + """Get homepage URL for a user.""" 176 + if self.index and username in self.index.users: 177 + user = self.index.users[username] 178 + return str(user.homepage) if user.homepage else None 179 + return None 180 + 181 + def clean_html_summary(self, content: Optional[str], max_length: int = 200) -> str: 182 + """Clean HTML content and truncate for display in timeline.""" 183 + if not content: 184 + return "" 185 + 186 + # Remove HTML tags 187 + clean_text = re.sub(r"<[^>]+>", " ", content) 188 + # Replace multiple whitespace with single space 189 + clean_text = re.sub(r"\s+", " ", clean_text) 190 + # Strip leading/trailing whitespace 191 + clean_text = clean_text.strip() 192 + 193 + # Truncate with ellipsis if needed 194 + if len(clean_text) > max_length: 195 + # Try to break at word boundary 196 + truncated = clean_text[:max_length] 197 + last_space = truncated.rfind(" ") 198 + if ( 199 + last_space > max_length * 0.8 200 + ): # If we can break reasonably close to the limit 201 + clean_text = truncated[:last_space] + "..." 202 + else: 203 + clean_text = truncated + "..." 204 + 205 + return clean_text 206 + 207 + def load_data(self) -> None: 208 + """Load all data from the git repository.""" 209 + with Progress( 210 + SpinnerColumn(), 211 + TextColumn("[progress.description]{task.description}"), 212 + console=console, 213 + ) as progress: 214 + # Load index 215 + task = progress.add_task("Loading repository index...", total=None) 216 + self.index = self.git_store._load_index() 217 + if not self.index: 218 + raise ValueError("No index found in repository") 219 + progress.update(task, completed=True) 220 + 221 + # Load all entries 222 + task = progress.add_task("Loading entries...", total=None) 223 + for username, user_metadata in self.index.users.items(): 224 + user_dir = self.git_store.repo_path / user_metadata.directory 225 + if user_dir.exists(): 226 + for entry_file in user_dir.glob("*.json"): 227 + if entry_file.name not in ["index.json", "duplicates.json"]: 228 + try: 229 + with open(entry_file) as f: 230 + entry_data = json.load(f) 231 + entry = AtomEntry(**entry_data) 232 + self.entries.append((username, entry)) 233 + except Exception as e: 234 + console.print( 235 + f"[yellow]Warning: Failed to load {entry_file}: {e}[/yellow]" 236 + ) 237 + progress.update(task, completed=True) 238 + 239 + # Sort entries by date (newest first) - prioritize updated over published 240 + self.entries.sort( 241 + key=lambda x: x[1].updated or x[1].published or datetime.min, reverse=True 242 + ) 243 + 244 + # Load links data 245 + task = progress.add_task("Loading links and references...", total=None) 246 + links_file = self.git_store.repo_path / "links.json" 247 + if links_file.exists(): 248 + with open(links_file) as f: 249 + self.links_data = json.load(f) 250 + progress.update(task, completed=True) 251 + 252 + def build_threads(self) -> None: 253 + """Build threaded conversations from references.""" 254 + if not self.links_data or "references" not in self.links_data: 255 + return 256 + 257 + # Map entry IDs to (username, entry) tuples 258 + entry_map: dict[str, tuple[str, AtomEntry]] = {} 259 + for username, entry in self.entries: 260 + entry_map[entry.id] = (username, entry) 261 + 262 + # Build adjacency lists for references 263 + self.outbound_refs: dict[str, set[str]] = {} 264 + self.inbound_refs: dict[str, set[str]] = {} 265 + self.reference_details: dict[ 266 + str, list[dict[str, Any]] 267 + ] = {} # Store full reference info 268 + 269 + for ref in self.links_data["references"]: 270 + source_id = ref["source_entry_id"] 271 + target_id = ref.get("target_entry_id") 272 + 273 + if target_id and source_id in entry_map and target_id in entry_map: 274 + self.outbound_refs.setdefault(source_id, set()).add(target_id) 275 + self.inbound_refs.setdefault(target_id, set()).add(source_id) 276 + 277 + # Store reference details for UI 278 + self.reference_details.setdefault(source_id, []).append( 279 + { 280 + "target_id": target_id, 281 + "target_username": ref.get("target_username"), 282 + "type": "outbound", 283 + } 284 + ) 285 + self.reference_details.setdefault(target_id, []).append( 286 + { 287 + "source_id": source_id, 288 + "source_username": ref.get("source_username"), 289 + "type": "inbound", 290 + } 291 + ) 292 + 293 + # Find conversation threads (multi-post discussions) 294 + processed = set() 295 + 296 + for entry_id, (_username, _entry) in entry_map.items(): 297 + if entry_id in processed: 298 + continue 299 + 300 + # Build thread starting from this entry 301 + thread = [] 302 + to_visit = [entry_id] 303 + thread_ids = set() 304 + level_map: dict[str, int] = {} # Track levels for this thread 305 + 306 + # First, traverse up to find the root 307 + current = entry_id 308 + while current in self.inbound_refs: 309 + parents = self.inbound_refs[current] - { 310 + current 311 + } # Exclude self-references 312 + if not parents: 313 + break 314 + # Take the first parent 315 + parent = next(iter(parents)) 316 + if parent in thread_ids: # Avoid cycles 317 + break 318 + current = parent 319 + to_visit.insert(0, current) 320 + 321 + # Now traverse down from the root 322 + while to_visit: 323 + current = to_visit.pop(0) 324 + if current in thread_ids or current not in entry_map: 325 + continue 326 + 327 + thread_ids.add(current) 328 + username, entry = entry_map[current] 329 + 330 + # Calculate thread level 331 + thread_level = self._calculate_thread_level(current, level_map) 332 + 333 + # Add threading metadata 334 + thread_entry = { 335 + "username": username, 336 + "display_name": self.get_display_name(username), 337 + "entry": entry, 338 + "entry_id": current, 339 + "references_to": list(self.outbound_refs.get(current, [])), 340 + "referenced_by": list(self.inbound_refs.get(current, [])), 341 + "thread_level": thread_level, 342 + } 343 + thread.append(thread_entry) 344 + processed.add(current) 345 + 346 + # Add children 347 + if current in self.outbound_refs: 348 + children = self.outbound_refs[current] - thread_ids # Avoid cycles 349 + to_visit.extend(sorted(children)) 350 + 351 + if len(thread) > 1: # Only keep actual threads 352 + # Sort thread by date (newest first) - prioritize updated over published 353 + thread.sort(key=lambda x: x["entry"].updated or x["entry"].published or datetime.min, reverse=True) # type: ignore 354 + self.threads.append(thread) 355 + 356 + # Sort threads by the date of their most recent entry - prioritize updated over published 357 + self.threads.sort( 358 + key=lambda t: max( 359 + item["entry"].updated or item["entry"].published or datetime.min for item in t 360 + ), 361 + reverse=True, 362 + ) 363 + 364 + def _calculate_thread_level( 365 + self, entry_id: str, processed_entries: dict[str, int] 366 + ) -> int: 367 + """Calculate indentation level for threaded display.""" 368 + if entry_id in processed_entries: 369 + return processed_entries[entry_id] 370 + 371 + if entry_id not in self.inbound_refs: 372 + processed_entries[entry_id] = 0 373 + return 0 374 + 375 + parents_in_thread = self.inbound_refs[entry_id] & set(processed_entries.keys()) 376 + if not parents_in_thread: 377 + processed_entries[entry_id] = 0 378 + return 0 379 + 380 + # Find the deepest parent level + 1 381 + max_parent_level = 0 382 + for parent_id in parents_in_thread: 383 + parent_level = self._calculate_thread_level(parent_id, processed_entries) 384 + max_parent_level = max(max_parent_level, parent_level) 385 + 386 + level = min(max_parent_level + 1, 4) # Cap at level 4 387 + processed_entries[entry_id] = level 388 + return level 389 + 390 + def get_standalone_references(self) -> list[dict[str, Any]]: 391 + """Get posts that have references but aren't part of multi-post threads.""" 392 + if not hasattr(self, "reference_details"): 393 + return [] 394 + 395 + threaded_entry_ids = set() 396 + for thread in self.threads: 397 + for item in thread: 398 + threaded_entry_ids.add(item["entry_id"]) 399 + 400 + standalone_refs = [] 401 + for username, entry in self.entries: 402 + if ( 403 + entry.id in self.reference_details 404 + and entry.id not in threaded_entry_ids 405 + ): 406 + refs = self.reference_details[entry.id] 407 + # Only include if it has meaningful references (not just self-references) 408 + meaningful_refs = [ 409 + r 410 + for r in refs 411 + if r.get("target_id") != entry.id and r.get("source_id") != entry.id 412 + ] 413 + if meaningful_refs: 414 + standalone_refs.append( 415 + { 416 + "username": username, 417 + "display_name": self.get_display_name(username), 418 + "entry": entry, 419 + "references": meaningful_refs, 420 + } 421 + ) 422 + 423 + return standalone_refs 424 + 425 + def _add_cross_thread_links(self, timeline_items: list[dict[str, Any]]) -> None: 426 + """Add cross-thread linking for entries that appear in multiple threads.""" 427 + # Map entry IDs to their positions in the timeline 428 + entry_positions: dict[str, list[int]] = {} 429 + # Map URLs referenced by entries to the entries that reference them 430 + url_references: dict[str, list[tuple[str, int]]] = {} # url -> [(entry_id, position)] 431 + 432 + # First pass: collect all entry IDs, their positions, and referenced URLs 433 + for i, item in enumerate(timeline_items): 434 + if item["type"] == "post": 435 + entry_id = item["content"]["entry"].id 436 + entry_positions.setdefault(entry_id, []).append(i) 437 + # Track URLs this entry references 438 + if entry_id in self.reference_details: 439 + for ref in self.reference_details[entry_id]: 440 + if ref["type"] == "outbound" and "target_id" in ref: 441 + # Find the target entry's URL if available 442 + target_entry = self._find_entry_by_id(ref["target_id"]) 443 + if target_entry and target_entry.link: 444 + url = str(target_entry.link) 445 + url_references.setdefault(url, []).append((entry_id, i)) 446 + elif item["type"] == "thread": 447 + for thread_item in item["content"]: 448 + entry_id = thread_item["entry"].id 449 + entry_positions.setdefault(entry_id, []).append(i) 450 + # Track URLs this entry references 451 + if entry_id in self.reference_details: 452 + for ref in self.reference_details[entry_id]: 453 + if ref["type"] == "outbound" and "target_id" in ref: 454 + target_entry = self._find_entry_by_id(ref["target_id"]) 455 + if target_entry and target_entry.link: 456 + url = str(target_entry.link) 457 + url_references.setdefault(url, []).append((entry_id, i)) 458 + 459 + # Build cross-thread connections - only for entries that actually appear multiple times 460 + cross_thread_connections: dict[str, set[int]] = {} # entry_id -> set of timeline positions 461 + 462 + # Add connections ONLY for entries that appear multiple times in the timeline 463 + for entry_id, positions in entry_positions.items(): 464 + if len(positions) > 1: 465 + cross_thread_connections[entry_id] = set(positions) 466 + # Debug: uncomment to see which entries have multiple appearances 467 + # print(f"Entry {entry_id[:50]}... appears at positions: {positions}") 468 + 469 + # Apply cross-thread links to timeline items 470 + for entry_id, positions_set in cross_thread_connections.items(): 471 + positions_list = list(positions_set) 472 + for pos in positions_list: 473 + item = timeline_items[pos] 474 + other_positions = sorted([p for p in positions_list if p != pos]) 475 + 476 + if item["type"] == "post": 477 + # Add cross-thread info to individual posts 478 + item["content"]["cross_thread_links"] = self._build_cross_thread_link_data(entry_id, other_positions, timeline_items) 479 + # Add info about shared references 480 + item["content"]["shared_references"] = self._get_shared_references(entry_id, positions_set, timeline_items) 481 + elif item["type"] == "thread": 482 + # Add cross-thread info to thread items 483 + for thread_item in item["content"]: 484 + if thread_item["entry"].id == entry_id: 485 + thread_item["cross_thread_links"] = self._build_cross_thread_link_data(entry_id, other_positions, timeline_items) 486 + thread_item["shared_references"] = self._get_shared_references(entry_id, positions_set, timeline_items) 487 + break 488 + 489 + def _build_cross_thread_link_data(self, entry_id: str, other_positions: list[int], timeline_items: list[dict[str, Any]]) -> list[dict[str, Any]]: 490 + """Build detailed cross-thread link data with anchor information.""" 491 + cross_thread_links = [] 492 + 493 + for pos in other_positions: 494 + item = timeline_items[pos] 495 + if item["type"] == "post": 496 + # For individual posts 497 + safe_id = safe_anchor_id(entry_id) 498 + cross_thread_links.append({ 499 + "position": pos, 500 + "anchor_id": f"post-{pos}-{safe_id}", 501 + "context": "individual post", 502 + "title": item["content"]["entry"].title 503 + }) 504 + elif item["type"] == "thread": 505 + # For thread items, find the specific thread item 506 + for thread_idx, thread_item in enumerate(item["content"]): 507 + if thread_item["entry"].id == entry_id: 508 + safe_id = safe_anchor_id(entry_id) 509 + cross_thread_links.append({ 510 + "position": pos, 511 + "anchor_id": f"post-{pos}-{thread_idx}-{safe_id}", 512 + "context": f"thread (level {thread_item.get('thread_level', 0)})", 513 + "title": thread_item["entry"].title 514 + }) 515 + break 516 + 517 + return cross_thread_links 518 + 519 + def _find_entry_by_id(self, entry_id: str) -> Optional[AtomEntry]: 520 + """Find an entry by its ID.""" 521 + for _username, entry in self.entries: 522 + if entry.id == entry_id: 523 + return entry 524 + return None 525 + 526 + def _get_shared_references(self, entry_id: str, positions: Union[set[int], list[int]], timeline_items: list[dict[str, Any]]) -> list[dict[str, Any]]: 527 + """Get information about shared references between cross-thread entries.""" 528 + shared_refs = [] 529 + 530 + # Collect all referenced URLs from entries at these positions 531 + url_counts: dict[str, int] = {} 532 + referencing_entries: dict[str, list[str]] = {} # url -> [entry_ids] 533 + 534 + for pos in positions: 535 + item = timeline_items[pos] 536 + entries_to_check = [] 537 + 538 + if item["type"] == "post": 539 + entries_to_check.append(item["content"]["entry"]) 540 + elif item["type"] == "thread": 541 + entries_to_check.extend([ti["entry"] for ti in item["content"]]) 542 + 543 + for entry in entries_to_check: 544 + if entry.id in self.reference_details: 545 + for ref in self.reference_details[entry.id]: 546 + if ref["type"] == "outbound" and "target_id" in ref: 547 + target_entry = self._find_entry_by_id(ref["target_id"]) 548 + if target_entry and target_entry.link: 549 + url = str(target_entry.link) 550 + url_counts[url] = url_counts.get(url, 0) + 1 551 + if url not in referencing_entries: 552 + referencing_entries[url] = [] 553 + if entry.id not in referencing_entries[url]: 554 + referencing_entries[url].append(entry.id) 555 + 556 + # Find URLs referenced by multiple entries 557 + for url, count in url_counts.items(): 558 + if count > 1 and len(referencing_entries[url]) > 1: 559 + # Get the target entry info 560 + target_entry = None 561 + target_username = None 562 + for ref in (self.links_data or {}).get("references", []): 563 + if ref.get("target_url") == url: 564 + target_username = ref.get("target_username") 565 + if ref.get("target_entry_id"): 566 + target_entry = self._find_entry_by_id(ref["target_entry_id"]) 567 + break 568 + 569 + shared_refs.append({ 570 + "url": url, 571 + "count": count, 572 + "referencing_entries": referencing_entries[url], 573 + "target_username": target_username, 574 + "target_title": target_entry.title if target_entry else None 575 + }) 576 + 577 + return sorted(shared_refs, key=lambda x: x["count"], reverse=True) 578 + 579 + def generate_site(self) -> None: 580 + """Generate the static website.""" 581 + # Create output directory 582 + self.output_dir.mkdir(parents=True, exist_ok=True) 583 + 584 + # Create static directories 585 + (self.output_dir / "css").mkdir(exist_ok=True) 586 + (self.output_dir / "js").mkdir(exist_ok=True) 587 + 588 + # Generate CSS 589 + css_template = self.env.get_template("style.css") 590 + css_content = css_template.render() 591 + with open(self.output_dir / "css" / "style.css", "w") as f: 592 + f.write(css_content) 593 + 594 + # Generate JavaScript 595 + js_template = self.env.get_template("script.js") 596 + js_content = js_template.render() 597 + with open(self.output_dir / "js" / "script.js", "w") as f: 598 + f.write(js_content) 599 + 600 + # Prepare common template data 601 + base_data = { 602 + "title": "Energy & Environment Group", 603 + "generated_at": datetime.now().isoformat(), 604 + "get_display_name": self.get_display_name, 605 + "get_user_homepage": self.get_user_homepage, 606 + "clean_html_summary": self.clean_html_summary, 607 + "safe_anchor_id": safe_anchor_id, 608 + } 609 + 610 + # Build unified timeline 611 + timeline_items = [] 612 + 613 + # Only consider the threads that will actually be displayed 614 + displayed_threads = self.threads[:20] # Limit to 20 threads 615 + 616 + # Track which entries are part of displayed threads 617 + threaded_entry_ids = set() 618 + for thread in displayed_threads: 619 + for item in thread: 620 + threaded_entry_ids.add(item["entry_id"]) 621 + 622 + # Add threads to timeline (using the date of the most recent post) 623 + for thread in displayed_threads: 624 + most_recent_date = max( 625 + item["entry"].updated or item["entry"].published or datetime.min 626 + for item in thread 627 + ) 628 + timeline_items.append({ 629 + "type": "thread", 630 + "date": most_recent_date, 631 + "content": thread 632 + }) 633 + 634 + # Add individual posts (not in threads) 635 + for username, entry in self.entries[:50]: 636 + if entry.id not in threaded_entry_ids: 637 + # Check if this entry has references 638 + has_refs = ( 639 + entry.id in self.reference_details 640 + if hasattr(self, "reference_details") 641 + else False 642 + ) 643 + 644 + refs = [] 645 + if has_refs: 646 + refs = self.reference_details.get(entry.id, []) 647 + refs = [ 648 + r for r in refs 649 + if r.get("target_id") != entry.id 650 + and r.get("source_id") != entry.id 651 + ] 652 + 653 + timeline_items.append({ 654 + "type": "post", 655 + "date": entry.updated or entry.published or datetime.min, 656 + "content": { 657 + "username": username, 658 + "display_name": self.get_display_name(username), 659 + "entry": entry, 660 + "references": refs if refs else None 661 + } 662 + }) 663 + 664 + # Sort unified timeline by date (newest first) 665 + timeline_items.sort(key=lambda x: x["date"], reverse=True) 666 + 667 + # Limit timeline to what will actually be rendered 668 + timeline_items = timeline_items[:50] # Limit to 50 items total 669 + 670 + # Add cross-thread linking for repeat blog references 671 + self._add_cross_thread_links(timeline_items) 672 + 673 + # Prepare outgoing links data 674 + outgoing_links = [] 675 + if self.links_data and "links" in self.links_data: 676 + for url, link_info in self.links_data["links"].items(): 677 + referencing_entries = [] 678 + for entry_id in link_info.get("referencing_entries", []): 679 + for username, entry in self.entries: 680 + if entry.id == entry_id: 681 + referencing_entries.append( 682 + (self.get_display_name(username), entry) 683 + ) 684 + break 685 + 686 + if referencing_entries: 687 + # Sort by date - prioritize updated over published 688 + referencing_entries.sort( 689 + key=lambda x: x[1].updated or x[1].published or datetime.min, reverse=True 690 + ) 691 + outgoing_links.append( 692 + { 693 + "url": url, 694 + "target_username": link_info.get("target_username"), 695 + "entries": referencing_entries, 696 + } 697 + ) 698 + 699 + # Sort links by most recent reference - prioritize updated over published 700 + outgoing_links.sort( 701 + key=lambda x: x["entries"][0][1].updated 702 + or x["entries"][0][1].published or datetime.min, 703 + reverse=True, 704 + ) 705 + 706 + # Prepare users data 707 + users: list[UserData] = [] 708 + if self.index: 709 + for username, user_metadata in self.index.users.items(): 710 + # Get recent entries for this user with display names 711 + user_entries = [ 712 + (self.get_display_name(u), e) 713 + for u, e in self.entries 714 + if u == username 715 + ][:5] 716 + users.append( 717 + {"metadata": user_metadata, "recent_entries": user_entries} 718 + ) 719 + # Sort by entry count 720 + users.sort(key=lambda x: x["metadata"].entry_count, reverse=True) 721 + 722 + # Generate timeline page 723 + timeline_template = self.env.get_template("timeline.html") 724 + timeline_content = timeline_template.render( 725 + **base_data, 726 + page="timeline", 727 + timeline_items=timeline_items, # Already limited above 728 + ) 729 + with open(self.output_dir / "timeline.html", "w") as f: 730 + f.write(timeline_content) 731 + 732 + # Generate links page 733 + links_template = self.env.get_template("links.html") 734 + links_content = links_template.render( 735 + **base_data, 736 + page="links", 737 + outgoing_links=outgoing_links[:100], 738 + ) 739 + with open(self.output_dir / "links.html", "w") as f: 740 + f.write(links_content) 741 + 742 + # Generate users page 743 + users_template = self.env.get_template("users.html") 744 + users_content = users_template.render( 745 + **base_data, 746 + page="users", 747 + users=users, 748 + ) 749 + with open(self.output_dir / "users.html", "w") as f: 750 + f.write(users_content) 751 + 752 + # Generate main index page (redirect to timeline) 753 + index_template = self.env.get_template("index.html") 754 + index_content = index_template.render(**base_data) 755 + with open(self.output_dir / "index.html", "w") as f: 756 + f.write(index_content) 757 + 758 + console.print(f"[green]✓[/green] Generated website at {self.output_dir}") 759 + console.print(f" - {len(self.entries)} entries") 760 + console.print(f" - {len(self.threads)} conversation threads") 761 + console.print(f" - {len(outgoing_links)} outgoing links") 762 + console.print(f" - {len(users)} users") 763 + console.print( 764 + " - Generated pages: index.html, timeline.html, links.html, users.html" 765 + ) 766 + 767 + 768 + @app.command() 769 + def generate( 770 + output: Path = typer.Option( 771 + Path("./thicket-site"), 772 + "--output", 773 + "-o", 774 + help="Output directory for the generated website", 775 + ), 776 + force: bool = typer.Option( 777 + False, "--force", "-f", help="Overwrite existing output directory" 778 + ), 779 + config_file: Path = typer.Option( 780 + Path("thicket.yaml"), "--config", help="Configuration file path" 781 + ), 782 + ) -> None: 783 + """Generate a static HTML website from thicket data.""" 784 + config = load_config(config_file) 785 + 786 + if not config.git_store: 787 + console.print("[red]No git store path configured[/red]") 788 + raise typer.Exit(1) 789 + 790 + git_store = GitStore(config.git_store) 791 + 792 + # Check if output directory exists 793 + if output.exists() and not force: 794 + console.print( 795 + f"[red]Output directory {output} already exists. Use --force to overwrite.[/red]" 796 + ) 797 + raise typer.Exit(1) 798 + 799 + # Clean output directory if forcing 800 + if output.exists() and force: 801 + shutil.rmtree(output) 802 + 803 + try: 804 + generator = WebsiteGenerator(git_store, output) 805 + 806 + console.print("[bold]Generating static website...[/bold]") 807 + generator.load_data() 808 + generator.build_threads() 809 + generator.generate_site() 810 + 811 + except Exception as e: 812 + console.print(f"[red]Error generating website: {e}[/red]") 813 + raise typer.Exit(1) from e 814 + </file> 815 + 816 + <file path="src/thicket/templates/base.html"> 817 + <!DOCTYPE html> 818 + <html lang="en"> 819 + <head> 820 + <meta charset="UTF-8"> 821 + <meta name="viewport" content="width=device-width, initial-scale=1.0"> 822 + <title>{% block page_title %}{{ title }}{% endblock %}</title> 823 + <link rel="stylesheet" href="css/style.css"> 824 + </head> 825 + <body> 826 + <header class="site-header"> 827 + <div class="header-content"> 828 + <h1 class="site-title">{{ title }}</h1> 829 + <nav class="site-nav"> 830 + <a href="timeline.html" class="nav-link {% if page == 'timeline' %}active{% endif %}">Timeline</a> 831 + <a href="links.html" class="nav-link {% if page == 'links' %}active{% endif %}">Links</a> 832 + <a href="users.html" class="nav-link {% if page == 'users' %}active{% endif %}">Users</a> 833 + </nav> 834 + </div> 835 + </header> 836 + 837 + <main class="main-content"> 838 + {% block content %}{% endblock %} 839 + </main> 840 + 841 + <footer class="site-footer"> 842 + <p>Generated on {{ generated_at }} by <a href="https://github.com/avsm/thicket">Thicket</a></p> 843 + </footer> 844 + 845 + <script src="js/script.js"></script> 846 + </body> 847 + </html> 848 + </file> 849 + 850 + <file path="src/thicket/templates/index.html"> 851 + <!DOCTYPE html> 852 + <html lang="en"> 853 + <head> 854 + <meta charset="UTF-8"> 855 + <meta name="viewport" content="width=device-width, initial-scale=1.0"> 856 + <title>{{ title }}</title> 857 + <meta http-equiv="refresh" content="0; url=timeline.html"> 858 + <link rel="canonical" href="timeline.html"> 859 + </head> 860 + <body> 861 + <p>Redirecting to <a href="timeline.html">Timeline</a>...</p> 862 + </body> 863 + </html> 864 + </file> 865 + 866 + <file path="src/thicket/templates/links.html"> 867 + {% extends "base.html" %} 868 + 869 + {% block page_title %}Outgoing Links - {{ title }}{% endblock %} 870 + 871 + {% block content %} 872 + <div class="page-content"> 873 + <h2>Outgoing Links</h2> 874 + <p class="page-description">External links referenced in blog posts, ordered by most recent reference.</p> 875 + 876 + {% for link in outgoing_links %} 877 + <article class="link-group"> 878 + <h3 class="link-url"> 879 + <a href="{{ link.url }}" target="_blank">{{ link.url|truncate(80) }}</a> 880 + {% if link.target_username %} 881 + <span class="target-user">({{ link.target_username }})</span> 882 + {% endif %} 883 + </h3> 884 + <div class="referencing-entries"> 885 + <span class="ref-count">Referenced in {{ link.entries|length }} post(s):</span> 886 + <ul> 887 + {% for display_name, entry in link.entries[:5] %} 888 + <li> 889 + <span class="author">{{ display_name }}</span> - 890 + <a href="{{ entry.link }}" target="_blank">{{ entry.title }}</a> 891 + <time datetime="{{ entry.updated or entry.published }}"> 892 + ({{ (entry.updated or entry.published).strftime('%Y-%m-%d') }}) 893 + </time> 894 + </li> 895 + {% endfor %} 896 + {% if link.entries|length > 5 %} 897 + <li class="more">... and {{ link.entries|length - 5 }} more</li> 898 + {% endif %} 899 + </ul> 900 + </div> 901 + </article> 902 + {% endfor %} 903 + </div> 904 + {% endblock %} 905 + </file> 906 + 907 + <file path="src/thicket/templates/script.js"> 908 + // Enhanced functionality for thicket website 909 + document.addEventListener('DOMContentLoaded', function() { 910 + 911 + // Enhance thread collapsing (optional feature) 912 + const threadHeaders = document.querySelectorAll('.thread-header'); 913 + threadHeaders.forEach(header => { 914 + header.style.cursor = 'pointer'; 915 + header.addEventListener('click', function() { 916 + const thread = this.parentElement; 917 + const entries = thread.querySelectorAll('.thread-entry'); 918 + 919 + // Toggle visibility of all but the first entry 920 + for (let i = 1; i < entries.length; i++) { 921 + entries[i].style.display = entries[i].style.display === 'none' ? 'block' : 'none'; 922 + } 923 + 924 + // Update thread count text 925 + const count = this.querySelector('.thread-count'); 926 + if (entries[1] && entries[1].style.display === 'none') { 927 + count.textContent = count.textContent.replace('posts', 'posts (collapsed)'); 928 + } else { 929 + count.textContent = count.textContent.replace(' (collapsed)', ''); 930 + } 931 + }); 932 + }); 933 + 934 + // Add relative time display 935 + const timeElements = document.querySelectorAll('time'); 936 + timeElements.forEach(timeEl => { 937 + const datetime = new Date(timeEl.getAttribute('datetime')); 938 + const now = new Date(); 939 + const diffMs = now - datetime; 940 + const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24)); 941 + 942 + let relativeTime; 943 + if (diffDays === 0) { 944 + const diffHours = Math.floor(diffMs / (1000 * 60 * 60)); 945 + if (diffHours === 0) { 946 + const diffMinutes = Math.floor(diffMs / (1000 * 60)); 947 + relativeTime = diffMinutes === 0 ? 'just now' : `${diffMinutes}m ago`; 948 + } else { 949 + relativeTime = `${diffHours}h ago`; 950 + } 951 + } else if (diffDays === 1) { 952 + relativeTime = 'yesterday'; 953 + } else if (diffDays < 7) { 954 + relativeTime = `${diffDays}d ago`; 955 + } else if (diffDays < 30) { 956 + const weeks = Math.floor(diffDays / 7); 957 + relativeTime = weeks === 1 ? '1w ago' : `${weeks}w ago`; 958 + } else if (diffDays < 365) { 959 + const months = Math.floor(diffDays / 30); 960 + relativeTime = months === 1 ? '1mo ago' : `${months}mo ago`; 961 + } else { 962 + const years = Math.floor(diffDays / 365); 963 + relativeTime = years === 1 ? '1y ago' : `${years}y ago`; 964 + } 965 + 966 + // Add relative time as title attribute 967 + timeEl.setAttribute('title', timeEl.textContent); 968 + timeEl.textContent = relativeTime; 969 + }); 970 + 971 + // Enhanced anchor link scrolling for shared references 972 + document.querySelectorAll('a[href^="#"]').forEach(anchor => { 973 + anchor.addEventListener('click', function (e) { 974 + e.preventDefault(); 975 + const target = document.querySelector(this.getAttribute('href')); 976 + if (target) { 977 + target.scrollIntoView({ 978 + behavior: 'smooth', 979 + block: 'center' 980 + }); 981 + 982 + // Highlight the target briefly 983 + const timelineEntry = target.closest('.timeline-entry'); 984 + if (timelineEntry) { 985 + timelineEntry.style.outline = '2px solid var(--primary-color)'; 986 + timelineEntry.style.borderRadius = '8px'; 987 + setTimeout(() => { 988 + timelineEntry.style.outline = ''; 989 + timelineEntry.style.borderRadius = ''; 990 + }, 2000); 991 + } 992 + } 993 + }); 994 + }); 995 + }); 996 + </file> 997 + 998 + <file path="src/thicket/templates/style.css"> 999 + /* Modern, clean design with high-density text and readable theme */ 1000 + 1001 + :root { 1002 + --primary-color: #2c3e50; 1003 + --secondary-color: #3498db; 1004 + --accent-color: #e74c3c; 1005 + --background: #ffffff; 1006 + --surface: #f8f9fa; 1007 + --text-primary: #2c3e50; 1008 + --text-secondary: #7f8c8d; 1009 + --border-color: #e0e0e0; 1010 + --thread-indent: 20px; 1011 + --max-width: 1200px; 1012 + } 1013 + 1014 + * { 1015 + margin: 0; 1016 + padding: 0; 1017 + box-sizing: border-box; 1018 + } 1019 + 1020 + body { 1021 + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Helvetica Neue', Arial, sans-serif; 1022 + font-size: 14px; 1023 + line-height: 1.6; 1024 + color: var(--text-primary); 1025 + background-color: var(--background); 1026 + } 1027 + 1028 + /* Header */ 1029 + .site-header { 1030 + background-color: var(--surface); 1031 + border-bottom: 1px solid var(--border-color); 1032 + padding: 0.75rem 0; 1033 + position: sticky; 1034 + top: 0; 1035 + z-index: 100; 1036 + } 1037 + 1038 + .header-content { 1039 + max-width: var(--max-width); 1040 + margin: 0 auto; 1041 + padding: 0 2rem; 1042 + display: flex; 1043 + justify-content: space-between; 1044 + align-items: center; 1045 + } 1046 + 1047 + .site-title { 1048 + font-size: 1.5rem; 1049 + font-weight: 600; 1050 + color: var(--primary-color); 1051 + margin: 0; 1052 + } 1053 + 1054 + /* Navigation */ 1055 + .site-nav { 1056 + display: flex; 1057 + gap: 1.5rem; 1058 + } 1059 + 1060 + .nav-link { 1061 + text-decoration: none; 1062 + color: var(--text-secondary); 1063 + font-weight: 500; 1064 + font-size: 0.95rem; 1065 + padding: 0.5rem 0.75rem; 1066 + border-radius: 4px; 1067 + transition: all 0.2s ease; 1068 + } 1069 + 1070 + .nav-link:hover { 1071 + color: var(--primary-color); 1072 + background-color: var(--background); 1073 + } 1074 + 1075 + .nav-link.active { 1076 + color: var(--secondary-color); 1077 + background-color: var(--background); 1078 + font-weight: 600; 1079 + } 1080 + 1081 + /* Main Content */ 1082 + .main-content { 1083 + max-width: var(--max-width); 1084 + margin: 2rem auto; 1085 + padding: 0 2rem; 1086 + } 1087 + 1088 + .page-content { 1089 + margin: 0; 1090 + } 1091 + 1092 + .page-description { 1093 + color: var(--text-secondary); 1094 + margin-bottom: 1.5rem; 1095 + font-style: italic; 1096 + } 1097 + 1098 + /* Sections */ 1099 + section { 1100 + margin-bottom: 2rem; 1101 + } 1102 + 1103 + h2 { 1104 + font-size: 1.3rem; 1105 + font-weight: 600; 1106 + margin-bottom: 0.75rem; 1107 + color: var(--primary-color); 1108 + } 1109 + 1110 + h3 { 1111 + font-size: 1.1rem; 1112 + font-weight: 600; 1113 + margin-bottom: 0.75rem; 1114 + color: var(--primary-color); 1115 + } 1116 + 1117 + /* Entries and Threads */ 1118 + article { 1119 + margin-bottom: 1.5rem; 1120 + padding: 1rem; 1121 + background-color: var(--surface); 1122 + border-radius: 4px; 1123 + border: 1px solid var(--border-color); 1124 + } 1125 + 1126 + /* Timeline-style entries */ 1127 + .timeline-entry { 1128 + margin-bottom: 0.5rem; 1129 + padding: 0.5rem 0.75rem; 1130 + border: none; 1131 + background: transparent; 1132 + transition: background-color 0.2s ease; 1133 + } 1134 + 1135 + .timeline-entry:hover { 1136 + background-color: var(--surface); 1137 + } 1138 + 1139 + .timeline-meta { 1140 + display: inline-flex; 1141 + gap: 0.5rem; 1142 + align-items: center; 1143 + font-size: 0.75rem; 1144 + color: var(--text-secondary); 1145 + margin-bottom: 0.25rem; 1146 + } 1147 + 1148 + .timeline-time { 1149 + font-family: 'SF Mono', Monaco, Consolas, 'Courier New', monospace; 1150 + font-size: 0.75rem; 1151 + color: var(--text-secondary); 1152 + } 1153 + 1154 + .timeline-author { 1155 + font-weight: 600; 1156 + color: var(--primary-color); 1157 + font-size: 0.8rem; 1158 + text-decoration: none; 1159 + } 1160 + 1161 + .timeline-author:hover { 1162 + color: var(--secondary-color); 1163 + text-decoration: underline; 1164 + } 1165 + 1166 + .timeline-content { 1167 + line-height: 1.4; 1168 + } 1169 + 1170 + .timeline-title { 1171 + font-size: 0.95rem; 1172 + font-weight: 600; 1173 + } 1174 + 1175 + .timeline-title a { 1176 + color: var(--primary-color); 1177 + text-decoration: none; 1178 + } 1179 + 1180 + .timeline-title a:hover { 1181 + color: var(--secondary-color); 1182 + text-decoration: underline; 1183 + } 1184 + 1185 + .timeline-summary { 1186 + color: var(--text-secondary); 1187 + font-size: 0.9rem; 1188 + line-height: 1.4; 1189 + } 1190 + 1191 + /* Legacy styles for other sections */ 1192 + .entry-meta, .thread-header { 1193 + display: flex; 1194 + gap: 1rem; 1195 + align-items: center; 1196 + margin-bottom: 0.5rem; 1197 + font-size: 0.85rem; 1198 + color: var(--text-secondary); 1199 + } 1200 + 1201 + .author { 1202 + font-weight: 600; 1203 + color: var(--primary-color); 1204 + } 1205 + 1206 + time { 1207 + font-size: 0.85rem; 1208 + } 1209 + 1210 + h4 { 1211 + font-size: 1.1rem; 1212 + font-weight: 600; 1213 + margin-bottom: 0.5rem; 1214 + } 1215 + 1216 + h4 a { 1217 + color: var(--primary-color); 1218 + text-decoration: none; 1219 + } 1220 + 1221 + h4 a:hover { 1222 + color: var(--secondary-color); 1223 + text-decoration: underline; 1224 + } 1225 + 1226 + .entry-summary { 1227 + color: var(--text-primary); 1228 + line-height: 1.5; 1229 + margin-top: 0.5rem; 1230 + } 1231 + 1232 + /* Enhanced Threading Styles */ 1233 + 1234 + /* Conversation Clusters */ 1235 + .conversation-cluster { 1236 + background-color: var(--background); 1237 + border: 2px solid var(--border-color); 1238 + border-radius: 8px; 1239 + margin-bottom: 2rem; 1240 + overflow: hidden; 1241 + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05); 1242 + } 1243 + 1244 + .conversation-header { 1245 + background: linear-gradient(135deg, var(--surface) 0%, #f1f3f4 100%); 1246 + padding: 0.75rem 1rem; 1247 + border-bottom: 1px solid var(--border-color); 1248 + } 1249 + 1250 + .conversation-meta { 1251 + display: flex; 1252 + justify-content: space-between; 1253 + align-items: center; 1254 + flex-wrap: wrap; 1255 + gap: 0.5rem; 1256 + } 1257 + 1258 + .conversation-count { 1259 + font-weight: 600; 1260 + color: var(--secondary-color); 1261 + font-size: 0.9rem; 1262 + } 1263 + 1264 + .conversation-participants { 1265 + font-size: 0.8rem; 1266 + color: var(--text-secondary); 1267 + flex: 1; 1268 + text-align: right; 1269 + } 1270 + 1271 + .conversation-flow { 1272 + padding: 0.5rem; 1273 + } 1274 + 1275 + /* Threaded Conversation Entries */ 1276 + .conversation-entry { 1277 + position: relative; 1278 + margin-bottom: 0.75rem; 1279 + display: flex; 1280 + align-items: flex-start; 1281 + } 1282 + 1283 + .conversation-entry.level-0 { 1284 + margin-left: 0; 1285 + } 1286 + 1287 + .conversation-entry.level-1 { 1288 + margin-left: 1.5rem; 1289 + } 1290 + 1291 + .conversation-entry.level-2 { 1292 + margin-left: 3rem; 1293 + } 1294 + 1295 + .conversation-entry.level-3 { 1296 + margin-left: 4.5rem; 1297 + } 1298 + 1299 + .conversation-entry.level-4 { 1300 + margin-left: 6rem; 1301 + } 1302 + 1303 + .entry-connector { 1304 + width: 3px; 1305 + background-color: var(--secondary-color); 1306 + margin-right: 0.75rem; 1307 + margin-top: 0.25rem; 1308 + min-height: 2rem; 1309 + border-radius: 2px; 1310 + opacity: 0.6; 1311 + } 1312 + 1313 + .conversation-entry.level-0 .entry-connector { 1314 + background-color: var(--accent-color); 1315 + opacity: 0.8; 1316 + } 1317 + 1318 + .entry-content { 1319 + flex: 1; 1320 + background-color: var(--surface); 1321 + padding: 0.75rem; 1322 + border-radius: 6px; 1323 + border: 1px solid var(--border-color); 1324 + transition: all 0.2s ease; 1325 + } 1326 + 1327 + .entry-content:hover { 1328 + border-color: var(--secondary-color); 1329 + box-shadow: 0 2px 8px rgba(52, 152, 219, 0.1); 1330 + } 1331 + 1332 + /* Reference Indicators */ 1333 + .reference-indicators { 1334 + display: inline-flex; 1335 + gap: 0.25rem; 1336 + margin-left: 0.5rem; 1337 + } 1338 + 1339 + .ref-out, .ref-in { 1340 + display: inline-block; 1341 + width: 1rem; 1342 + height: 1rem; 1343 + border-radius: 50%; 1344 + text-align: center; 1345 + line-height: 1rem; 1346 + font-size: 0.7rem; 1347 + font-weight: bold; 1348 + } 1349 + 1350 + .ref-out { 1351 + background-color: #e8f5e8; 1352 + color: #2d8f2d; 1353 + } 1354 + 1355 + .ref-in { 1356 + background-color: #e8f0ff; 1357 + color: #1f5fbf; 1358 + } 1359 + 1360 + /* Reference Badges for Individual Posts */ 1361 + .timeline-entry.with-references { 1362 + background-color: var(--surface); 1363 + } 1364 + 1365 + /* Conversation posts in unified timeline */ 1366 + .timeline-entry.conversation-post { 1367 + background: transparent; 1368 + border: none; 1369 + margin-bottom: 0.5rem; 1370 + padding: 0.5rem 0.75rem; 1371 + } 1372 + 1373 + .timeline-entry.conversation-post.level-0 { 1374 + margin-left: 0; 1375 + border-left: 2px solid var(--accent-color); 1376 + padding-left: 0.75rem; 1377 + } 1378 + 1379 + .timeline-entry.conversation-post.level-1 { 1380 + margin-left: 1.5rem; 1381 + border-left: 2px solid var(--secondary-color); 1382 + padding-left: 0.75rem; 1383 + } 1384 + 1385 + .timeline-entry.conversation-post.level-2 { 1386 + margin-left: 3rem; 1387 + border-left: 2px solid var(--text-secondary); 1388 + padding-left: 0.75rem; 1389 + } 1390 + 1391 + .timeline-entry.conversation-post.level-3 { 1392 + margin-left: 4.5rem; 1393 + border-left: 2px solid var(--text-secondary); 1394 + padding-left: 0.75rem; 1395 + } 1396 + 1397 + .timeline-entry.conversation-post.level-4 { 1398 + margin-left: 6rem; 1399 + border-left: 2px solid var(--text-secondary); 1400 + padding-left: 0.75rem; 1401 + } 1402 + 1403 + /* Cross-thread linking */ 1404 + .cross-thread-links { 1405 + margin-top: 0.5rem; 1406 + padding-top: 0.5rem; 1407 + border-top: 1px solid var(--border-color); 1408 + } 1409 + 1410 + .cross-thread-indicator { 1411 + font-size: 0.75rem; 1412 + color: var(--text-secondary); 1413 + background-color: var(--surface); 1414 + padding: 0.25rem 0.5rem; 1415 + border-radius: 12px; 1416 + border: 1px solid var(--border-color); 1417 + display: inline-block; 1418 + } 1419 + 1420 + /* Inline shared references styling */ 1421 + .inline-shared-refs { 1422 + margin-left: 0.5rem; 1423 + font-size: 0.85rem; 1424 + color: var(--text-secondary); 1425 + } 1426 + 1427 + .shared-ref-link { 1428 + color: var(--primary-color); 1429 + text-decoration: none; 1430 + font-weight: 500; 1431 + transition: color 0.2s ease; 1432 + } 1433 + 1434 + .shared-ref-link:hover { 1435 + color: var(--secondary-color); 1436 + text-decoration: underline; 1437 + } 1438 + 1439 + .shared-ref-more { 1440 + font-style: italic; 1441 + color: var(--text-secondary); 1442 + font-size: 0.8rem; 1443 + margin-left: 0.25rem; 1444 + } 1445 + 1446 + .user-anchor, .post-anchor { 1447 + position: absolute; 1448 + margin-top: -60px; /* Offset for fixed header */ 1449 + pointer-events: none; 1450 + } 1451 + 1452 + .cross-thread-link { 1453 + color: var(--primary-color); 1454 + text-decoration: none; 1455 + font-weight: 500; 1456 + transition: color 0.2s ease; 1457 + } 1458 + 1459 + .cross-thread-link:hover { 1460 + color: var(--secondary-color); 1461 + text-decoration: underline; 1462 + } 1463 + 1464 + .reference-badges { 1465 + display: flex; 1466 + gap: 0.25rem; 1467 + margin-left: 0.5rem; 1468 + flex-wrap: wrap; 1469 + } 1470 + 1471 + .ref-badge { 1472 + display: inline-block; 1473 + padding: 0.1rem 0.4rem; 1474 + border-radius: 12px; 1475 + font-size: 0.7rem; 1476 + font-weight: 600; 1477 + text-transform: uppercase; 1478 + letter-spacing: 0.05em; 1479 + } 1480 + 1481 + .ref-badge.ref-outbound { 1482 + background-color: #e8f5e8; 1483 + color: #2d8f2d; 1484 + border: 1px solid #c3e6c3; 1485 + } 1486 + 1487 + .ref-badge.ref-inbound { 1488 + background-color: #e8f0ff; 1489 + color: #1f5fbf; 1490 + border: 1px solid #b3d9ff; 1491 + } 1492 + 1493 + /* Author Color Coding */ 1494 + .timeline-author { 1495 + position: relative; 1496 + } 1497 + 1498 + .timeline-author::before { 1499 + content: ''; 1500 + display: inline-block; 1501 + width: 8px; 1502 + height: 8px; 1503 + border-radius: 50%; 1504 + margin-right: 0.5rem; 1505 + background-color: var(--secondary-color); 1506 + } 1507 + 1508 + /* Generate consistent colors for authors */ 1509 + .author-avsm::before { background-color: #e74c3c; } 1510 + .author-mort::before { background-color: #3498db; } 1511 + .author-mte::before { background-color: #2ecc71; } 1512 + .author-ryan::before { background-color: #f39c12; } 1513 + .author-mwd::before { background-color: #9b59b6; } 1514 + .author-dra::before { background-color: #1abc9c; } 1515 + .author-pf341::before { background-color: #34495e; } 1516 + .author-sadiqj::before { background-color: #e67e22; } 1517 + .author-martinkl::before { background-color: #8e44ad; } 1518 + .author-jonsterling::before { background-color: #27ae60; } 1519 + .author-jon::before { background-color: #f1c40f; } 1520 + .author-onkar::before { background-color: #e91e63; } 1521 + .author-gabriel::before { background-color: #00bcd4; } 1522 + .author-jess::before { background-color: #ff5722; } 1523 + .author-ibrahim::before { background-color: #607d8b; } 1524 + .author-andres::before { background-color: #795548; } 1525 + .author-eeg::before { background-color: #ff9800; } 1526 + 1527 + /* Section Headers */ 1528 + .conversations-section h3, 1529 + .referenced-posts-section h3, 1530 + .individual-posts-section h3 { 1531 + border-bottom: 2px solid var(--border-color); 1532 + padding-bottom: 0.5rem; 1533 + margin-bottom: 1.5rem; 1534 + position: relative; 1535 + } 1536 + 1537 + .conversations-section h3::before { 1538 + content: "💬"; 1539 + margin-right: 0.5rem; 1540 + } 1541 + 1542 + .referenced-posts-section h3::before { 1543 + content: "🔗"; 1544 + margin-right: 0.5rem; 1545 + } 1546 + 1547 + .individual-posts-section h3::before { 1548 + content: "📝"; 1549 + margin-right: 0.5rem; 1550 + } 1551 + 1552 + /* Legacy thread styles (for backward compatibility) */ 1553 + .thread { 1554 + background-color: var(--background); 1555 + border: 1px solid var(--border-color); 1556 + padding: 0; 1557 + overflow: hidden; 1558 + margin-bottom: 1rem; 1559 + } 1560 + 1561 + .thread-header { 1562 + background-color: var(--surface); 1563 + padding: 0.5rem 0.75rem; 1564 + border-bottom: 1px solid var(--border-color); 1565 + } 1566 + 1567 + .thread-count { 1568 + font-weight: 600; 1569 + color: var(--secondary-color); 1570 + } 1571 + 1572 + .thread-entry { 1573 + padding: 0.5rem 0.75rem; 1574 + border-bottom: 1px solid var(--border-color); 1575 + } 1576 + 1577 + .thread-entry:last-child { 1578 + border-bottom: none; 1579 + } 1580 + 1581 + .thread-entry.reply { 1582 + margin-left: var(--thread-indent); 1583 + border-left: 3px solid var(--secondary-color); 1584 + background-color: var(--surface); 1585 + } 1586 + 1587 + /* Links Section */ 1588 + .link-group { 1589 + background-color: var(--background); 1590 + } 1591 + 1592 + .link-url { 1593 + font-size: 1rem; 1594 + word-break: break-word; 1595 + } 1596 + 1597 + .link-url a { 1598 + color: var(--secondary-color); 1599 + text-decoration: none; 1600 + } 1601 + 1602 + .link-url a:hover { 1603 + text-decoration: underline; 1604 + } 1605 + 1606 + .target-user { 1607 + font-size: 0.9rem; 1608 + color: var(--text-secondary); 1609 + font-weight: normal; 1610 + } 1611 + 1612 + .referencing-entries { 1613 + margin-top: 0.75rem; 1614 + } 1615 + 1616 + .ref-count { 1617 + font-weight: 600; 1618 + color: var(--text-secondary); 1619 + font-size: 0.9rem; 1620 + } 1621 + 1622 + .referencing-entries ul { 1623 + list-style: none; 1624 + margin-top: 0.5rem; 1625 + padding-left: 1rem; 1626 + } 1627 + 1628 + .referencing-entries li { 1629 + margin-bottom: 0.25rem; 1630 + font-size: 0.9rem; 1631 + } 1632 + 1633 + .referencing-entries .more { 1634 + font-style: italic; 1635 + color: var(--text-secondary); 1636 + } 1637 + 1638 + /* Users Section */ 1639 + .user-card { 1640 + background-color: var(--background); 1641 + } 1642 + 1643 + .user-header { 1644 + display: flex; 1645 + gap: 1rem; 1646 + align-items: start; 1647 + margin-bottom: 1rem; 1648 + } 1649 + 1650 + .user-icon { 1651 + width: 48px; 1652 + height: 48px; 1653 + border-radius: 50%; 1654 + object-fit: cover; 1655 + } 1656 + 1657 + .user-info h3 { 1658 + margin-bottom: 0.25rem; 1659 + } 1660 + 1661 + .username { 1662 + font-size: 0.9rem; 1663 + color: var(--text-secondary); 1664 + font-weight: normal; 1665 + } 1666 + 1667 + .user-meta { 1668 + font-size: 0.9rem; 1669 + color: var(--text-secondary); 1670 + } 1671 + 1672 + .user-meta a { 1673 + color: var(--secondary-color); 1674 + text-decoration: none; 1675 + } 1676 + 1677 + .user-meta a:hover { 1678 + text-decoration: underline; 1679 + } 1680 + 1681 + .separator { 1682 + margin: 0 0.5rem; 1683 + } 1684 + 1685 + .post-count { 1686 + font-weight: 600; 1687 + } 1688 + 1689 + .user-recent h4 { 1690 + font-size: 0.95rem; 1691 + margin-bottom: 0.5rem; 1692 + color: var(--text-secondary); 1693 + } 1694 + 1695 + .user-recent ul { 1696 + list-style: none; 1697 + padding-left: 0; 1698 + } 1699 + 1700 + .user-recent li { 1701 + margin-bottom: 0.25rem; 1702 + font-size: 0.9rem; 1703 + } 1704 + 1705 + /* Footer */ 1706 + .site-footer { 1707 + max-width: var(--max-width); 1708 + margin: 3rem auto 2rem; 1709 + padding: 1rem 2rem; 1710 + text-align: center; 1711 + color: var(--text-secondary); 1712 + font-size: 0.85rem; 1713 + border-top: 1px solid var(--border-color); 1714 + } 1715 + 1716 + .site-footer a { 1717 + color: var(--secondary-color); 1718 + text-decoration: none; 1719 + } 1720 + 1721 + .site-footer a:hover { 1722 + text-decoration: underline; 1723 + } 1724 + 1725 + /* Responsive */ 1726 + @media (max-width: 768px) { 1727 + .site-title { 1728 + font-size: 1.3rem; 1729 + } 1730 + 1731 + .header-content { 1732 + flex-direction: column; 1733 + gap: 0.75rem; 1734 + align-items: flex-start; 1735 + } 1736 + 1737 + .site-nav { 1738 + gap: 1rem; 1739 + } 1740 + 1741 + .main-content { 1742 + padding: 0 1rem; 1743 + } 1744 + 1745 + .thread-entry.reply { 1746 + margin-left: calc(var(--thread-indent) / 2); 1747 + } 1748 + 1749 + .user-header { 1750 + flex-direction: column; 1751 + } 1752 + } 1753 + </file> 1754 + 1755 + <file path="src/thicket/templates/timeline.html"> 1756 + {% extends "base.html" %} 1757 + 1758 + {% block page_title %}Timeline - {{ title }}{% endblock %} 1759 + 1760 + {% block content %} 1761 + {% set seen_users = [] %} 1762 + <div class="page-content"> 1763 + <h2>Recent Posts & Conversations</h2> 1764 + 1765 + <section class="unified-timeline"> 1766 + {% for item in timeline_items %} 1767 + {% if item.type == "post" %} 1768 +  1769 + <article class="timeline-entry {% if item.content.references %}with-references{% endif %}"> 1770 + <div class="timeline-meta"> 1771 + <time datetime="{{ item.content.entry.updated or item.content.entry.published }}" class="timeline-time"> 1772 + {{ (item.content.entry.updated or item.content.entry.published).strftime('%Y-%m-%d %H:%M') }} 1773 + </time> 1774 + {% set homepage = get_user_homepage(item.content.username) %} 1775 + {% if item.content.username not in seen_users %} 1776 + <a id="{{ item.content.username }}" class="user-anchor"></a> 1777 + {% set _ = seen_users.append(item.content.username) %} 1778 + {% endif %} 1779 + <a id="post-{{ loop.index0 }}-{{ safe_anchor_id(item.content.entry.id) }}" class="post-anchor"></a> 1780 + {% if homepage %} 1781 + <a href="{{ homepage }}" target="_blank" class="timeline-author">{{ item.content.display_name }}</a> 1782 + {% else %} 1783 + <span class="timeline-author">{{ item.content.display_name }}</span> 1784 + {% endif %} 1785 + {% if item.content.references %} 1786 + <div class="reference-badges"> 1787 + {% for ref in item.content.references %} 1788 + {% if ref.type == 'outbound' %} 1789 + <span class="ref-badge ref-outbound" title="References {{ ref.target_username or 'external post' }}"> 1790 + → {{ ref.target_username or 'ext' }} 1791 + </span> 1792 + {% elif ref.type == 'inbound' %} 1793 + <span class="ref-badge ref-inbound" title="Referenced by {{ ref.source_username or 'external post' }}"> 1794 + ← {{ ref.source_username or 'ext' }} 1795 + </span> 1796 + {% endif %} 1797 + {% endfor %} 1798 + </div> 1799 + {% endif %} 1800 + </div> 1801 + <div class="timeline-content"> 1802 + <strong class="timeline-title"> 1803 + <a href="{{ item.content.entry.link }}" target="_blank">{{ item.content.entry.title }}</a> 1804 + </strong> 1805 + {% if item.content.entry.summary %} 1806 + <span class="timeline-summary">— {{ clean_html_summary(item.content.entry.summary, 250) }}</span> 1807 + {% endif %} 1808 + {% if item.content.shared_references %} 1809 + <span class="inline-shared-refs"> 1810 + {% for ref in item.content.shared_references[:3] %} 1811 + {% if ref.target_username %} 1812 + <a href="#{{ ref.target_username }}" class="shared-ref-link" title="Referenced by {{ ref.count }} entries">@{{ ref.target_username }}</a>{% if not loop.last %}, {% endif %} 1813 + {% endif %} 1814 + {% endfor %} 1815 + {% if item.content.shared_references|length > 3 %} 1816 + <span class="shared-ref-more">+{{ item.content.shared_references|length - 3 }} more</span> 1817 + {% endif %} 1818 + </span> 1819 + {% endif %} 1820 + {% if item.content.cross_thread_links %} 1821 + <div class="cross-thread-links"> 1822 + <span class="cross-thread-indicator">🔗 Also appears: </span> 1823 + {% for link in item.content.cross_thread_links %} 1824 + <a href="#{{ link.anchor_id }}" class="cross-thread-link" title="{{ link.title }}">{{ link.context }}</a>{% if not loop.last %}, {% endif %} 1825 + {% endfor %} 1826 + </div> 1827 + {% endif %} 1828 + </div> 1829 + </article> 1830 + 1831 + {% elif item.type == "thread" %} 1832 +  1833 + {% set outer_loop_index = loop.index0 %} 1834 + {% for thread_item in item.content %} 1835 + <article class="timeline-entry conversation-post level-{{ thread_item.thread_level }}"> 1836 + <div class="timeline-meta"> 1837 + <time datetime="{{ thread_item.entry.updated or thread_item.entry.published }}" class="timeline-time"> 1838 + {{ (thread_item.entry.updated or thread_item.entry.published).strftime('%Y-%m-%d %H:%M') }} 1839 + </time> 1840 + {% set homepage = get_user_homepage(thread_item.username) %} 1841 + {% if thread_item.username not in seen_users %} 1842 + <a id="{{ thread_item.username }}" class="user-anchor"></a> 1843 + {% set _ = seen_users.append(thread_item.username) %} 1844 + {% endif %} 1845 + <a id="post-{{ outer_loop_index }}-{{ loop.index0 }}-{{ safe_anchor_id(thread_item.entry.id) }}" class="post-anchor"></a> 1846 + {% if homepage %} 1847 + <a href="{{ homepage }}" target="_blank" class="timeline-author author-{{ thread_item.username }}">{{ thread_item.display_name }}</a> 1848 + {% else %} 1849 + <span class="timeline-author author-{{ thread_item.username }}">{{ thread_item.display_name }}</span> 1850 + {% endif %} 1851 + {% if thread_item.references_to or thread_item.referenced_by %} 1852 + <span class="reference-indicators"> 1853 + {% if thread_item.references_to %} 1854 + <span class="ref-out" title="References other posts">→</span> 1855 + {% endif %} 1856 + {% if thread_item.referenced_by %} 1857 + <span class="ref-in" title="Referenced by other posts">←</span> 1858 + {% endif %} 1859 + </span> 1860 + {% endif %} 1861 + </div> 1862 + <div class="timeline-content"> 1863 + <strong class="timeline-title"> 1864 + <a href="{{ thread_item.entry.link }}" target="_blank">{{ thread_item.entry.title }}</a> 1865 + </strong> 1866 + {% if thread_item.entry.summary %} 1867 + <span class="timeline-summary">— {{ clean_html_summary(thread_item.entry.summary, 300) }}</span> 1868 + {% endif %} 1869 + {% if thread_item.shared_references %} 1870 + <span class="inline-shared-refs"> 1871 + {% for ref in thread_item.shared_references[:3] %} 1872 + {% if ref.target_username %} 1873 + <a href="#{{ ref.target_username }}" class="shared-ref-link" title="Referenced by {{ ref.count }} entries">@{{ ref.target_username }}</a>{% if not loop.last %}, {% endif %} 1874 + {% endif %} 1875 + {% endfor %} 1876 + {% if thread_item.shared_references|length > 3 %} 1877 + <span class="shared-ref-more">+{{ thread_item.shared_references|length - 3 }} more</span> 1878 + {% endif %} 1879 + </span> 1880 + {% endif %} 1881 + {% if thread_item.cross_thread_links %} 1882 + <div class="cross-thread-links"> 1883 + <span class="cross-thread-indicator">🔗 Also appears: </span> 1884 + {% for link in thread_item.cross_thread_links %} 1885 + <a href="#{{ link.anchor_id }}" class="cross-thread-link" title="{{ link.title }}">{{ link.context }}</a>{% if not loop.last %}, {% endif %} 1886 + {% endfor %} 1887 + </div> 1888 + {% endif %} 1889 + </div> 1890 + </article> 1891 + {% endfor %} 1892 + {% endif %} 1893 + {% endfor %} 1894 + </section> 1895 + </div> 1896 + {% endblock %} 1897 + </file> 1898 + 1899 + <file path="src/thicket/templates/users.html"> 1900 + {% extends "base.html" %} 1901 + 1902 + {% block page_title %}Users - {{ title }}{% endblock %} 1903 + 1904 + {% block content %} 1905 + <div class="page-content"> 1906 + <h2>Users</h2> 1907 + <p class="page-description">All users contributing to this thicket, ordered by post count.</p> 1908 + 1909 + {% for user_info in users %} 1910 + <article class="user-card"> 1911 + <div class="user-header"> 1912 + {% if user_info.metadata.icon and user_info.metadata.icon != "None" %} 1913 + <img src="{{ user_info.metadata.icon }}" alt="{{ user_info.metadata.username }}" class="user-icon"> 1914 + {% endif %} 1915 + <div class="user-info"> 1916 + <h3> 1917 + {% if user_info.metadata.display_name %} 1918 + {{ user_info.metadata.display_name }} 1919 + <span class="username">({{ user_info.metadata.username }})</span> 1920 + {% else %} 1921 + {{ user_info.metadata.username }} 1922 + {% endif %} 1923 + </h3> 1924 + <div class="user-meta"> 1925 + {% if user_info.metadata.homepage %} 1926 + <a href="{{ user_info.metadata.homepage }}" target="_blank">{{ user_info.metadata.homepage }}</a> 1927 + {% endif %} 1928 + {% if user_info.metadata.email %} 1929 + <span class="separator">•</span> 1930 + <a href="mailto:{{ user_info.metadata.email }}">{{ user_info.metadata.email }}</a> 1931 + {% endif %} 1932 + <span class="separator">•</span> 1933 + <span class="post-count">{{ user_info.metadata.entry_count }} posts</span> 1934 + </div> 1935 + </div> 1936 + </div> 1937 + 1938 + {% if user_info.recent_entries %} 1939 + <div class="user-recent"> 1940 + <h4>Recent posts:</h4> 1941 + <ul> 1942 + {% for display_name, entry in user_info.recent_entries %} 1943 + <li> 1944 + <a href="{{ entry.link }}" target="_blank">{{ entry.title }}</a> 1945 + <time datetime="{{ entry.updated or entry.published }}"> 1946 + ({{ (entry.updated or entry.published).strftime('%Y-%m-%d') }}) 1947 + </time> 1948 + </li> 1949 + {% endfor %} 1950 + </ul> 1951 + </div> 1952 + {% endif %} 1953 + </article> 1954 + {% endfor %} 1955 + </div> 1956 + {% endblock %} 1957 + </file> 1958 + 1959 + <file path="README.md"> 1960 + # Thicket 1961 + 1962 + A modern CLI tool for persisting Atom/RSS feeds in Git repositories, designed to enable distributed webblog comment structures. 1963 + 1964 + ## Features 1965 + 1966 + - **Feed Auto-Discovery**: Automatically extracts user metadata from Atom/RSS feeds 1967 + - **Git Storage**: Stores feed entries in a Git repository with full history 1968 + - **Duplicate Management**: Manual curation of duplicate entries across feeds 1969 + - **Modern CLI**: Built with Typer and Rich for beautiful terminal output 1970 + - **Comprehensive Parsing**: Supports RSS 0.9x, RSS 1.0, RSS 2.0, and Atom feeds 1971 + - **Cron-Friendly**: Designed for scheduled execution 1972 + 1973 + ## Installation 1974 + 1975 + ```bash 1976 + # Install from source 1977 + pip install -e . 1978 + 1979 + # Or install with dev dependencies 1980 + pip install -e .[dev] 1981 + ``` 1982 + 1983 + ## Quick Start 1984 + 1985 + 1. **Initialize a new thicket repository:** 1986 + ```bash 1987 + thicket init ./my-feeds 1988 + ``` 1989 + 1990 + 2. **Add a user with their feed:** 1991 + ```bash 1992 + thicket add user "alice" --feed "https://alice.example.com/feed.xml" 1993 + ``` 1994 + 1995 + 3. **Sync feeds to download entries:** 1996 + ```bash 1997 + thicket sync --all 1998 + ``` 1999 + 2000 + 4. **List users and feeds:** 2001 + ```bash 2002 + thicket list users 2003 + thicket list feeds 2004 + thicket list entries 2005 + ``` 2006 + 2007 + ## Commands 2008 + 2009 + ### Initialize 2010 + ```bash 2011 + thicket init <git-store-path> [--cache-dir <path>] [--config <config-file>] 2012 + ``` 2013 + 2014 + ### Add Users and Feeds 2015 + ```bash 2016 + # Add user with auto-discovery 2017 + thicket add user "username" --feed "https://example.com/feed.xml" 2018 + 2019 + # Add user with manual metadata 2020 + thicket add user "username" \ 2021 + --feed "https://example.com/feed.xml" \ 2022 + --email "user@example.com" \ 2023 + --homepage "https://example.com" \ 2024 + --display-name "User Name" 2025 + 2026 + # Add additional feed to existing user 2027 + thicket add feed "username" "https://example.com/other-feed.xml" 2028 + ``` 2029 + 2030 + ### Sync Feeds 2031 + ```bash 2032 + # Sync all users 2033 + thicket sync --all 2034 + 2035 + # Sync specific user 2036 + thicket sync --user "username" 2037 + 2038 + # Dry run (preview changes) 2039 + thicket sync --all --dry-run 2040 + ``` 2041 + 2042 + ### List Information 2043 + ```bash 2044 + # List all users 2045 + thicket list users 2046 + 2047 + # List all feeds 2048 + thicket list feeds 2049 + 2050 + # List feeds for specific user 2051 + thicket list feeds --user "username" 2052 + 2053 + # List recent entries 2054 + thicket list entries --limit 20 2055 + 2056 + # List entries for specific user 2057 + thicket list entries --user "username" 2058 + ``` 2059 + 2060 + ### Manage Duplicates 2061 + ```bash 2062 + # List duplicate mappings 2063 + thicket duplicates list 2064 + 2065 + # Mark entries as duplicates 2066 + thicket duplicates add "https://example.com/dup" "https://example.com/canonical" 2067 + 2068 + # Remove duplicate mapping 2069 + thicket duplicates remove "https://example.com/dup" 2070 + ``` 2071 + 2072 + ## Configuration 2073 + 2074 + Thicket uses a YAML configuration file (default: `thicket.yaml`): 2075 + 2076 + ```yaml 2077 + git_store: ./feeds-repo 2078 + cache_dir: ~/.cache/thicket 2079 + users: 2080 + - username: alice 2081 + feeds: 2082 + - https://alice.example.com/feed.xml 2083 + email: alice@example.com 2084 + homepage: https://alice.example.com 2085 + display_name: Alice 2086 + ``` 2087 + 2088 + ## Git Repository Structure 2089 + 2090 + ``` 2091 + feeds-repo/ 2092 + ├── index.json # User directory index 2093 + ├── duplicates.json # Duplicate entry mappings 2094 + ├── alice/ 2095 + │ ├── metadata.json # User metadata 2096 + │ ├── entry_id_1.json # Feed entries 2097 + │ └── entry_id_2.json 2098 + └── bob/ 2099 + └── ... 2100 + ``` 2101 + 2102 + ## Development 2103 + 2104 + ### Setup 2105 + ```bash 2106 + # Install in development mode 2107 + pip install -e .[dev] 2108 + 2109 + # Run tests 2110 + pytest 2111 + 2112 + # Run linting 2113 + ruff check src/ 2114 + black --check src/ 2115 + 2116 + # Run type checking 2117 + mypy src/ 2118 + ``` 2119 + 2120 + ### Architecture 2121 + 2122 + - **CLI**: Modern interface with Typer and Rich 2123 + - **Feed Processing**: Universal parsing with feedparser 2124 + - **Git Storage**: Structured storage with GitPython 2125 + - **Data Models**: Pydantic for validation and serialization 2126 + - **Async HTTP**: httpx for efficient feed fetching 2127 + 2128 + ## Use Cases 2129 + 2130 + - **Blog Aggregation**: Collect and archive blog posts from multiple sources 2131 + - **Comment Networks**: Enable distributed commenting systems 2132 + - **Feed Archival**: Preserve feed history beyond typical feed depth limits 2133 + - **Content Curation**: Manage and deduplicate content across feeds 2134 + 2135 + ## License 2136 + 2137 + MIT License - see LICENSE file for details. 2138 + </file> 2139 + 2140 + <file path="src/thicket/cli/commands/index_cmd.py"> 2141 + """CLI command for building reference index from blog entries.""" 2142 + 2143 + import json 2144 + from pathlib import Path 2145 + from typing import Optional 2146 + 2147 + import typer 2148 + from rich.console import Console 2149 + from rich.progress import ( 2150 + BarColumn, 2151 + Progress, 2152 + SpinnerColumn, 2153 + TaskProgressColumn, 2154 + TextColumn, 2155 + ) 2156 + from rich.table import Table 2157 + 2158 + from ...core.git_store import GitStore 2159 + from ...core.reference_parser import ReferenceIndex, ReferenceParser 2160 + from ..main import app 2161 + from ..utils import get_tsv_mode, load_config 2162 + 2163 + console = Console() 2164 + 2165 + 2166 + @app.command() 2167 + def index( 2168 + config_file: Optional[Path] = typer.Option( 2169 + None, 2170 + "--config", 2171 + "-c", 2172 + help="Path to configuration file", 2173 + ), 2174 + output_file: Optional[Path] = typer.Option( 2175 + None, 2176 + "--output", 2177 + "-o", 2178 + help="Path to output index file (default: updates links.json in git store)", 2179 + ), 2180 + verbose: bool = typer.Option( 2181 + False, 2182 + "--verbose", 2183 + "-v", 2184 + help="Show detailed progress information", 2185 + ), 2186 + ) -> None: 2187 + """Build a reference index showing which blog entries reference others. 2188 + 2189 + This command analyzes all blog entries to detect cross-references between 2190 + different blogs, creating an index that can be used to build threaded 2191 + views of related content. 2192 + 2193 + Updates the unified links.json file with reference data. 2194 + """ 2195 + try: 2196 + # Load configuration 2197 + config = load_config(config_file) 2198 + 2199 + # Initialize Git store 2200 + git_store = GitStore(config.git_store) 2201 + 2202 + # Initialize reference parser 2203 + parser = ReferenceParser() 2204 + 2205 + # Build user domain mapping 2206 + if verbose: 2207 + console.print("Building user domain mapping...") 2208 + user_domains = parser.build_user_domain_mapping(git_store) 2209 + 2210 + if verbose: 2211 + console.print(f"Found {len(user_domains)} users with {sum(len(d) for d in user_domains.values())} total domains") 2212 + 2213 + # Initialize reference index 2214 + ref_index = ReferenceIndex() 2215 + ref_index.user_domains = user_domains 2216 + 2217 + # Get all users 2218 + index = git_store._load_index() 2219 + users = list(index.users.keys()) 2220 + 2221 + if not users: 2222 + console.print("[yellow]No users found in Git store[/yellow]") 2223 + raise typer.Exit(0) 2224 + 2225 + # Process all entries 2226 + total_entries = 0 2227 + total_references = 0 2228 + all_references = [] 2229 + 2230 + with Progress( 2231 + SpinnerColumn(), 2232 + TextColumn("[progress.description]{task.description}"), 2233 + BarColumn(), 2234 + TaskProgressColumn(), 2235 + console=console, 2236 + ) as progress: 2237 + 2238 + # Count total entries first 2239 + counting_task = progress.add_task("Counting entries...", total=len(users)) 2240 + entry_counts = {} 2241 + for username in users: 2242 + entries = git_store.list_entries(username) 2243 + entry_counts[username] = len(entries) 2244 + total_entries += len(entries) 2245 + progress.advance(counting_task) 2246 + 2247 + progress.remove_task(counting_task) 2248 + 2249 + # Process entries - extract references 2250 + processing_task = progress.add_task( 2251 + f"Extracting references from {total_entries} entries...", 2252 + total=total_entries 2253 + ) 2254 + 2255 + for username in users: 2256 + entries = git_store.list_entries(username) 2257 + 2258 + for entry in entries: 2259 + # Extract references from this entry 2260 + references = parser.extract_references(entry, username, user_domains) 2261 + all_references.extend(references) 2262 + 2263 + progress.advance(processing_task) 2264 + 2265 + if verbose and references: 2266 + console.print(f" Found {len(references)} references in {username}:{entry.title[:50]}...") 2267 + 2268 + progress.remove_task(processing_task) 2269 + 2270 + # Resolve target_entry_ids for references 2271 + if all_references: 2272 + resolve_task = progress.add_task( 2273 + f"Resolving {len(all_references)} references...", 2274 + total=len(all_references) 2275 + ) 2276 + 2277 + if verbose: 2278 + console.print(f"Resolving target entry IDs for {len(all_references)} references...") 2279 + 2280 + resolved_references = parser.resolve_target_entry_ids(all_references, git_store) 2281 + 2282 + # Count resolved references 2283 + resolved_count = sum(1 for ref in resolved_references if ref.target_entry_id is not None) 2284 + if verbose: 2285 + console.print(f"Resolved {resolved_count} out of {len(all_references)} references") 2286 + 2287 + # Add resolved references to index 2288 + for ref in resolved_references: 2289 + ref_index.add_reference(ref) 2290 + total_references += 1 2291 + progress.advance(resolve_task) 2292 + 2293 + progress.remove_task(resolve_task) 2294 + 2295 + # Determine output path 2296 + if output_file: 2297 + output_path = output_file 2298 + else: 2299 + output_path = config.git_store / "links.json" 2300 + 2301 + # Load existing links data or create new structure 2302 + if output_path.exists() and not output_file: 2303 + # Load existing unified structure 2304 + with open(output_path) as f: 2305 + existing_data = json.load(f) 2306 + else: 2307 + # Create new structure 2308 + existing_data = { 2309 + "links": {}, 2310 + "reverse_mapping": {}, 2311 + "user_domains": {} 2312 + } 2313 + 2314 + # Update with reference data 2315 + existing_data["references"] = ref_index.to_dict()["references"] 2316 + existing_data["user_domains"] = {k: list(v) for k, v in user_domains.items()} 2317 + 2318 + # Save updated structure 2319 + with open(output_path, "w") as f: 2320 + json.dump(existing_data, f, indent=2, default=str) 2321 + 2322 + # Show summary 2323 + if not get_tsv_mode(): 2324 + console.print("\n[green]✓ Reference index built successfully[/green]") 2325 + 2326 + # Create summary table or TSV output 2327 + if get_tsv_mode(): 2328 + print("Metric\tCount") 2329 + print(f"Total Users\t{len(users)}") 2330 + print(f"Total Entries\t{total_entries}") 2331 + print(f"Total References\t{total_references}") 2332 + print(f"Outbound Refs\t{len(ref_index.outbound_refs)}") 2333 + print(f"Inbound Refs\t{len(ref_index.inbound_refs)}") 2334 + print(f"Output File\t{output_path}") 2335 + else: 2336 + table = Table(title="Reference Index Summary") 2337 + table.add_column("Metric", style="cyan") 2338 + table.add_column("Count", style="green") 2339 + 2340 + table.add_row("Total Users", str(len(users))) 2341 + table.add_row("Total Entries", str(total_entries)) 2342 + table.add_row("Total References", str(total_references)) 2343 + table.add_row("Outbound Refs", str(len(ref_index.outbound_refs))) 2344 + table.add_row("Inbound Refs", str(len(ref_index.inbound_refs))) 2345 + table.add_row("Output File", str(output_path)) 2346 + 2347 + console.print(table) 2348 + 2349 + # Show some interesting statistics 2350 + if total_references > 0: 2351 + if not get_tsv_mode(): 2352 + console.print("\n[bold]Reference Statistics:[/bold]") 2353 + 2354 + # Most referenced users 2355 + target_counts = {} 2356 + unresolved_domains = set() 2357 + 2358 + for ref in ref_index.references: 2359 + if ref.target_username: 2360 + target_counts[ref.target_username] = target_counts.get(ref.target_username, 0) + 1 2361 + else: 2362 + # Track unresolved domains 2363 + from urllib.parse import urlparse 2364 + domain = urlparse(ref.target_url).netloc.lower() 2365 + unresolved_domains.add(domain) 2366 + 2367 + if target_counts: 2368 + if get_tsv_mode(): 2369 + print("Referenced User\tReference Count") 2370 + for username, count in sorted(target_counts.items(), key=lambda x: x[1], reverse=True)[:5]: 2371 + print(f"{username}\t{count}") 2372 + else: 2373 + console.print("\nMost referenced users:") 2374 + for username, count in sorted(target_counts.items(), key=lambda x: x[1], reverse=True)[:5]: 2375 + console.print(f" {username}: {count} references") 2376 + 2377 + if unresolved_domains and verbose: 2378 + if get_tsv_mode(): 2379 + print("Unresolved Domain\tCount") 2380 + for domain in sorted(list(unresolved_domains)[:10]): 2381 + print(f"{domain}\t1") 2382 + if len(unresolved_domains) > 10: 2383 + print(f"... and {len(unresolved_domains) - 10} more\t...") 2384 + else: 2385 + console.print(f"\nUnresolved domains: {len(unresolved_domains)}") 2386 + for domain in sorted(list(unresolved_domains)[:10]): 2387 + console.print(f" {domain}") 2388 + if len(unresolved_domains) > 10: 2389 + console.print(f" ... and {len(unresolved_domains) - 10} more") 2390 + 2391 + except Exception as e: 2392 + console.print(f"[red]Error building reference index: {e}[/red]") 2393 + if verbose: 2394 + console.print_exception() 2395 + raise typer.Exit(1) 2396 + 2397 + 2398 + @app.command() 2399 + def threads( 2400 + config_file: Optional[Path] = typer.Option( 2401 + None, 2402 + "--config", 2403 + "-c", 2404 + help="Path to configuration file", 2405 + ), 2406 + index_file: Optional[Path] = typer.Option( 2407 + None, 2408 + "--index", 2409 + "-i", 2410 + help="Path to reference index file (default: links.json in git store)", 2411 + ), 2412 + username: Optional[str] = typer.Option( 2413 + None, 2414 + "--username", 2415 + "-u", 2416 + help="Show threads for specific username only", 2417 + ), 2418 + entry_id: Optional[str] = typer.Option( 2419 + None, 2420 + "--entry", 2421 + "-e", 2422 + help="Show thread for specific entry ID", 2423 + ), 2424 + min_size: int = typer.Option( 2425 + 2, 2426 + "--min-size", 2427 + "-m", 2428 + help="Minimum thread size to display", 2429 + ), 2430 + ) -> None: 2431 + """Show threaded view of related blog entries. 2432 + 2433 + This command uses the reference index to show which blog entries 2434 + are connected through cross-references, creating an email-style 2435 + threaded view of the conversation. 2436 + 2437 + Reads reference data from the unified links.json file. 2438 + """ 2439 + try: 2440 + # Load configuration 2441 + config = load_config(config_file) 2442 + 2443 + # Determine index file path 2444 + if index_file: 2445 + index_path = index_file 2446 + else: 2447 + index_path = config.git_store / "links.json" 2448 + 2449 + if not index_path.exists(): 2450 + console.print(f"[red]Links file not found: {index_path}[/red]") 2451 + console.print("Run 'thicket links' and 'thicket index' first to build the reference index") 2452 + raise typer.Exit(1) 2453 + 2454 + # Load unified data 2455 + with open(index_path) as f: 2456 + unified_data = json.load(f) 2457 + 2458 + # Check if references exist in the unified structure 2459 + if "references" not in unified_data: 2460 + console.print(f"[red]No references found in {index_path}[/red]") 2461 + console.print("Run 'thicket index' first to build the reference index") 2462 + raise typer.Exit(1) 2463 + 2464 + # Extract reference data and reconstruct ReferenceIndex 2465 + ref_index = ReferenceIndex.from_dict({ 2466 + "references": unified_data["references"], 2467 + "user_domains": unified_data.get("user_domains", {}) 2468 + }) 2469 + 2470 + # Initialize Git store to get entry details 2471 + git_store = GitStore(config.git_store) 2472 + 2473 + if entry_id and username: 2474 + # Show specific thread 2475 + thread_members = ref_index.get_thread_members(username, entry_id) 2476 + _display_thread(thread_members, ref_index, git_store, f"Thread for {username}:{entry_id}") 2477 + 2478 + elif username: 2479 + # Show all threads involving this user 2480 + user_index = git_store._load_index() 2481 + user = user_index.get_user(username) 2482 + if not user: 2483 + console.print(f"[red]User not found: {username}[/red]") 2484 + raise typer.Exit(1) 2485 + 2486 + entries = git_store.list_entries(username) 2487 + threads_found = set() 2488 + 2489 + console.print(f"[bold]Threads involving {username}:[/bold]\n") 2490 + 2491 + for entry in entries: 2492 + thread_members = ref_index.get_thread_members(username, entry.id) 2493 + if len(thread_members) >= min_size: 2494 + thread_key = tuple(sorted(thread_members)) 2495 + if thread_key not in threads_found: 2496 + threads_found.add(thread_key) 2497 + _display_thread(thread_members, ref_index, git_store, f"Thread #{len(threads_found)}") 2498 + 2499 + else: 2500 + # Show all threads 2501 + console.print("[bold]All conversation threads:[/bold]\n") 2502 + 2503 + all_threads = set() 2504 + processed_entries = set() 2505 + 2506 + # Get all entries 2507 + user_index = git_store._load_index() 2508 + for username in user_index.users.keys(): 2509 + entries = git_store.list_entries(username) 2510 + for entry in entries: 2511 + entry_key = (username, entry.id) 2512 + if entry_key in processed_entries: 2513 + continue 2514 + 2515 + thread_members = ref_index.get_thread_members(username, entry.id) 2516 + if len(thread_members) >= min_size: 2517 + thread_key = tuple(sorted(thread_members)) 2518 + if thread_key not in all_threads: 2519 + all_threads.add(thread_key) 2520 + _display_thread(thread_members, ref_index, git_store, f"Thread #{len(all_threads)}") 2521 + 2522 + # Mark all members as processed 2523 + for member in thread_members: 2524 + processed_entries.add(member) 2525 + 2526 + if not all_threads: 2527 + console.print("[yellow]No conversation threads found[/yellow]") 2528 + console.print(f"(minimum thread size: {min_size})") 2529 + 2530 + except Exception as e: 2531 + console.print(f"[red]Error showing threads: {e}[/red]") 2532 + raise typer.Exit(1) 2533 + 2534 + 2535 + def _display_thread(thread_members, ref_index, git_store, title): 2536 + """Display a single conversation thread.""" 2537 + console.print(f"[bold cyan]{title}[/bold cyan]") 2538 + console.print(f"Thread size: {len(thread_members)} entries") 2539 + 2540 + # Get entry details for each member 2541 + thread_entries = [] 2542 + for username, entry_id in thread_members: 2543 + entry = git_store.get_entry(username, entry_id) 2544 + if entry: 2545 + thread_entries.append((username, entry)) 2546 + 2547 + # Sort by publication date 2548 + thread_entries.sort(key=lambda x: x[1].published or x[1].updated) 2549 + 2550 + # Display entries 2551 + for i, (username, entry) in enumerate(thread_entries): 2552 + prefix = "├─" if i < len(thread_entries) - 1 else "└─" 2553 + 2554 + # Get references for this entry 2555 + outbound = ref_index.get_outbound_refs(username, entry.id) 2556 + inbound = ref_index.get_inbound_refs(username, entry.id) 2557 + 2558 + ref_info = "" 2559 + if outbound or inbound: 2560 + ref_info = f" ({len(outbound)} out, {len(inbound)} in)" 2561 + 2562 + console.print(f" {prefix} [{username}] {entry.title[:60]}...{ref_info}") 2563 + 2564 + if entry.published: 2565 + console.print(f" Published: {entry.published.strftime('%Y-%m-%d')}") 2566 + 2567 + console.print() # Empty line after each thread 2568 + </file> 2569 + 2570 + <file path="src/thicket/cli/commands/info_cmd.py"> 2571 + """CLI command for displaying detailed information about a specific atom entry.""" 2572 + 2573 + import json 2574 + from pathlib import Path 2575 + from typing import Optional 2576 + 2577 + import typer 2578 + from rich.console import Console 2579 + from rich.panel import Panel 2580 + from rich.table import Table 2581 + from rich.text import Text 2582 + 2583 + from ...core.git_store import GitStore 2584 + from ...core.reference_parser import ReferenceIndex 2585 + from ..main import app 2586 + from ..utils import load_config, get_tsv_mode 2587 + 2588 + console = Console() 2589 + 2590 + 2591 + @app.command() 2592 + def info( 2593 + identifier: str = typer.Argument( 2594 + ..., 2595 + help="The atom ID or URL of the entry to display information about" 2596 + ), 2597 + username: Optional[str] = typer.Option( 2598 + None, 2599 + "--username", 2600 + "-u", 2601 + help="Username to search for the entry (if not provided, searches all users)" 2602 + ), 2603 + config_file: Optional[Path] = typer.Option( 2604 + Path("thicket.yaml"), 2605 + "--config", 2606 + "-c", 2607 + help="Path to configuration file", 2608 + ), 2609 + show_content: bool = typer.Option( 2610 + False, 2611 + "--content", 2612 + help="Include the full content of the entry in the output" 2613 + ), 2614 + ) -> None: 2615 + """Display detailed information about a specific atom entry. 2616 + 2617 + You can specify the entry using either its atom ID or URL. 2618 + Shows all metadata for the given entry, including title, dates, categories, 2619 + and summarizes all inbound and outbound links to/from other posts. 2620 + """ 2621 + try: 2622 + # Load configuration 2623 + config = load_config(config_file) 2624 + 2625 + # Initialize Git store 2626 + git_store = GitStore(config.git_store) 2627 + 2628 + # Find the entry 2629 + entry = None 2630 + found_username = None 2631 + 2632 + # Check if identifier looks like a URL 2633 + is_url = identifier.startswith(('http://', 'https://')) 2634 + 2635 + if username: 2636 + # Search specific username 2637 + if is_url: 2638 + # Search by URL 2639 + entries = git_store.list_entries(username) 2640 + for e in entries: 2641 + if str(e.link) == identifier: 2642 + entry = e 2643 + found_username = username 2644 + break 2645 + else: 2646 + # Search by atom ID 2647 + entry = git_store.get_entry(username, identifier) 2648 + if entry: 2649 + found_username = username 2650 + else: 2651 + # Search all users 2652 + index = git_store._load_index() 2653 + for user in index.users.keys(): 2654 + if is_url: 2655 + # Search by URL 2656 + entries = git_store.list_entries(user) 2657 + for e in entries: 2658 + if str(e.link) == identifier: 2659 + entry = e 2660 + found_username = user 2661 + break 2662 + if entry: 2663 + break 2664 + else: 2665 + # Search by atom ID 2666 + entry = git_store.get_entry(user, identifier) 2667 + if entry: 2668 + found_username = user 2669 + break 2670 + 2671 + if not entry or not found_username: 2672 + if username: 2673 + console.print(f"[red]Entry with {'URL' if is_url else 'atom ID'} '{identifier}' not found for user '{username}'[/red]") 2674 + else: 2675 + console.print(f"[red]Entry with {'URL' if is_url else 'atom ID'} '{identifier}' not found in any user's entries[/red]") 2676 + raise typer.Exit(1) 2677 + 2678 + # Load reference index if available 2679 + links_path = config.git_store / "links.json" 2680 + ref_index = None 2681 + if links_path.exists(): 2682 + with open(links_path) as f: 2683 + unified_data = json.load(f) 2684 + 2685 + # Check if references exist in the unified structure 2686 + if "references" in unified_data: 2687 + ref_index = ReferenceIndex.from_dict({ 2688 + "references": unified_data["references"], 2689 + "user_domains": unified_data.get("user_domains", {}) 2690 + }) 2691 + 2692 + # Display information 2693 + if get_tsv_mode(): 2694 + _display_entry_info_tsv(entry, found_username, ref_index, show_content) 2695 + else: 2696 + _display_entry_info(entry, found_username) 2697 + 2698 + if ref_index: 2699 + _display_link_info(entry, found_username, ref_index) 2700 + else: 2701 + console.print("\n[yellow]No reference index found. Run 'thicket links' and 'thicket index' to build cross-reference data.[/yellow]") 2702 + 2703 + # Optionally display content 2704 + if show_content and entry.content: 2705 + _display_content(entry.content) 2706 + 2707 + except Exception as e: 2708 + console.print(f"[red]Error displaying entry info: {e}[/red]") 2709 + raise typer.Exit(1) 2710 + 2711 + 2712 + def _display_entry_info(entry, username: str) -> None: 2713 + """Display basic entry information in a structured format.""" 2714 + 2715 + # Create main info panel 2716 + info_table = Table.grid(padding=(0, 2)) 2717 + info_table.add_column("Field", style="cyan bold", width=15) 2718 + info_table.add_column("Value", style="white") 2719 + 2720 + info_table.add_row("User", f"[green]{username}[/green]") 2721 + info_table.add_row("Atom ID", f"[blue]{entry.id}[/blue]") 2722 + info_table.add_row("Title", entry.title) 2723 + info_table.add_row("Link", str(entry.link)) 2724 + 2725 + if entry.published: 2726 + info_table.add_row("Published", entry.published.strftime("%Y-%m-%d %H:%M:%S UTC")) 2727 + 2728 + info_table.add_row("Updated", entry.updated.strftime("%Y-%m-%d %H:%M:%S UTC")) 2729 + 2730 + if entry.summary: 2731 + # Truncate long summaries 2732 + summary = entry.summary[:200] + "..." if len(entry.summary) > 200 else entry.summary 2733 + info_table.add_row("Summary", summary) 2734 + 2735 + if entry.categories: 2736 + categories_text = ", ".join(entry.categories) 2737 + info_table.add_row("Categories", categories_text) 2738 + 2739 + if entry.author: 2740 + author_info = [] 2741 + if "name" in entry.author: 2742 + author_info.append(entry.author["name"]) 2743 + if "email" in entry.author: 2744 + author_info.append(f"<{entry.author['email']}>") 2745 + if author_info: 2746 + info_table.add_row("Author", " ".join(author_info)) 2747 + 2748 + if entry.content_type: 2749 + info_table.add_row("Content Type", entry.content_type) 2750 + 2751 + if entry.rights: 2752 + info_table.add_row("Rights", entry.rights) 2753 + 2754 + if entry.source: 2755 + info_table.add_row("Source Feed", entry.source) 2756 + 2757 + panel = Panel( 2758 + info_table, 2759 + title=f"[bold]Entry Information[/bold]", 2760 + border_style="blue" 2761 + ) 2762 + 2763 + console.print(panel) 2764 + 2765 + 2766 + def _display_link_info(entry, username: str, ref_index: ReferenceIndex) -> None: 2767 + """Display inbound and outbound link information.""" 2768 + 2769 + # Get links 2770 + outbound_refs = ref_index.get_outbound_refs(username, entry.id) 2771 + inbound_refs = ref_index.get_inbound_refs(username, entry.id) 2772 + 2773 + if not outbound_refs and not inbound_refs: 2774 + console.print("\n[dim]No cross-references found for this entry.[/dim]") 2775 + return 2776 + 2777 + # Create links table 2778 + links_table = Table(title="Cross-References") 2779 + links_table.add_column("Direction", style="cyan", width=10) 2780 + links_table.add_column("Target/Source", style="green", width=20) 2781 + links_table.add_column("URL", style="blue", width=50) 2782 + 2783 + # Add outbound references 2784 + for ref in outbound_refs: 2785 + target_info = f"{ref.target_username}:{ref.target_entry_id}" if ref.target_username and ref.target_entry_id else "External" 2786 + links_table.add_row("→ Out", target_info, ref.target_url) 2787 + 2788 + # Add inbound references 2789 + for ref in inbound_refs: 2790 + source_info = f"{ref.source_username}:{ref.source_entry_id}" 2791 + links_table.add_row("← In", source_info, ref.target_url) 2792 + 2793 + console.print() 2794 + console.print(links_table) 2795 + 2796 + # Summary 2797 + console.print(f"\n[bold]Summary:[/bold] {len(outbound_refs)} outbound, {len(inbound_refs)} inbound references") 2798 + 2799 + 2800 + def _display_content(content: str) -> None: 2801 + """Display the full content of the entry.""" 2802 + 2803 + # Truncate very long content 2804 + display_content = content 2805 + if len(content) > 5000: 2806 + display_content = content[:5000] + "\n\n[... content truncated ...]" 2807 + 2808 + panel = Panel( 2809 + display_content, 2810 + title="[bold]Entry Content[/bold]", 2811 + border_style="green", 2812 + expand=False 2813 + ) 2814 + 2815 + console.print() 2816 + console.print(panel) 2817 + 2818 + 2819 + def _display_entry_info_tsv(entry, username: str, ref_index: Optional[ReferenceIndex], show_content: bool) -> None: 2820 + """Display entry information in TSV format.""" 2821 + 2822 + # Basic info 2823 + print("Field\tValue") 2824 + print(f"User\t{username}") 2825 + print(f"Atom ID\t{entry.id}") 2826 + print(f"Title\t{entry.title.replace(chr(9), ' ').replace(chr(10), ' ').replace(chr(13), ' ')}") 2827 + print(f"Link\t{entry.link}") 2828 + 2829 + if entry.published: 2830 + print(f"Published\t{entry.published.strftime('%Y-%m-%d %H:%M:%S UTC')}") 2831 + 2832 + print(f"Updated\t{entry.updated.strftime('%Y-%m-%d %H:%M:%S UTC')}") 2833 + 2834 + if entry.summary: 2835 + # Escape tabs and newlines in summary 2836 + summary = entry.summary.replace('\t', ' ').replace('\n', ' ').replace('\r', ' ') 2837 + print(f"Summary\t{summary}") 2838 + 2839 + if entry.categories: 2840 + print(f"Categories\t{', '.join(entry.categories)}") 2841 + 2842 + if entry.author: 2843 + author_info = [] 2844 + if "name" in entry.author: 2845 + author_info.append(entry.author["name"]) 2846 + if "email" in entry.author: 2847 + author_info.append(f"<{entry.author['email']}>") 2848 + if author_info: 2849 + print(f"Author\t{' '.join(author_info)}") 2850 + 2851 + if entry.content_type: 2852 + print(f"Content Type\t{entry.content_type}") 2853 + 2854 + if entry.rights: 2855 + print(f"Rights\t{entry.rights}") 2856 + 2857 + if entry.source: 2858 + print(f"Source Feed\t{entry.source}") 2859 + 2860 + # Add reference info if available 2861 + if ref_index: 2862 + outbound_refs = ref_index.get_outbound_refs(username, entry.id) 2863 + inbound_refs = ref_index.get_inbound_refs(username, entry.id) 2864 + 2865 + print(f"Outbound References\t{len(outbound_refs)}") 2866 + print(f"Inbound References\t{len(inbound_refs)}") 2867 + 2868 + # Show each reference 2869 + for ref in outbound_refs: 2870 + target_info = f"{ref.target_username}:{ref.target_entry_id}" if ref.target_username and ref.target_entry_id else "External" 2871 + print(f"Outbound Reference\t{target_info}\t{ref.target_url}") 2872 + 2873 + for ref in inbound_refs: 2874 + source_info = f"{ref.source_username}:{ref.source_entry_id}" 2875 + print(f"Inbound Reference\t{source_info}\t{ref.target_url}") 2876 + 2877 + # Show content if requested 2878 + if show_content and entry.content: 2879 + # Escape tabs and newlines in content 2880 + content = entry.content.replace('\t', ' ').replace('\n', ' ').replace('\r', ' ') 2881 + print(f"Content\t{content}") 2882 + </file> 2883 + 2884 + <file path="src/thicket/cli/commands/init.py"> 2885 + """Initialize command for thicket.""" 2886 + 2887 + from pathlib import Path 2888 + from typing import Optional 2889 + 2890 + import typer 2891 + from pydantic import ValidationError 2892 + 2893 + from ...core.git_store import GitStore 2894 + from ...models import ThicketConfig 2895 + from ..main import app 2896 + from ..utils import print_error, print_success, save_config 2897 + 2898 + 2899 + @app.command() 2900 + def init( 2901 + git_store: Path = typer.Argument(..., help="Path to Git repository for storing feeds"), 2902 + cache_dir: Optional[Path] = typer.Option( 2903 + None, "--cache-dir", "-c", help="Cache directory (default: ~/.cache/thicket)" 2904 + ), 2905 + config_file: Optional[Path] = typer.Option( 2906 + None, "--config", help="Configuration file path (default: thicket.yaml)" 2907 + ), 2908 + force: bool = typer.Option( 2909 + False, "--force", "-f", help="Overwrite existing configuration" 2910 + ), 2911 + ) -> None: 2912 + """Initialize a new thicket configuration and Git store.""" 2913 + 2914 + # Set default paths 2915 + if cache_dir is None: 2916 + from platformdirs import user_cache_dir 2917 + cache_dir = Path(user_cache_dir("thicket")) 2918 + 2919 + if config_file is None: 2920 + config_file = Path("thicket.yaml") 2921 + 2922 + # Check if config already exists 2923 + if config_file.exists() and not force: 2924 + print_error(f"Configuration file already exists: {config_file}") 2925 + print_error("Use --force to overwrite") 2926 + raise typer.Exit(1) 2927 + 2928 + # Create cache directory 2929 + cache_dir.mkdir(parents=True, exist_ok=True) 2930 + 2931 + # Create Git store 2932 + try: 2933 + GitStore(git_store) 2934 + print_success(f"Initialized Git store at: {git_store}") 2935 + except Exception as e: 2936 + print_error(f"Failed to initialize Git store: {e}") 2937 + raise typer.Exit(1) from e 2938 + 2939 + # Create configuration 2940 + try: 2941 + config = ThicketConfig( 2942 + git_store=git_store, 2943 + cache_dir=cache_dir, 2944 + users=[] 2945 + ) 2946 + 2947 + save_config(config, config_file) 2948 + print_success(f"Created configuration file: {config_file}") 2949 + 2950 + except ValidationError as e: 2951 + print_error(f"Invalid configuration: {e}") 2952 + raise typer.Exit(1) from e 2953 + except Exception as e: 2954 + print_error(f"Failed to create configuration: {e}") 2955 + raise typer.Exit(1) from e 2956 + 2957 + print_success("Thicket initialized successfully!") 2958 + print_success(f"Git store: {git_store}") 2959 + print_success(f"Cache directory: {cache_dir}") 2960 + print_success(f"Configuration: {config_file}") 2961 + print_success("Run 'thicket add user' to add your first user and feed.") 2962 + </file> 2963 + 2964 + <file path="src/thicket/cli/__init__.py"> 2965 + """CLI interface for thicket.""" 2966 + 2967 + from .main import app 2968 + 2969 + __all__ = ["app"] 2970 + </file> 2971 + 2972 + <file path="src/thicket/core/__init__.py"> 2973 + """Core business logic for thicket.""" 2974 + 2975 + from .feed_parser import FeedParser 2976 + from .git_store import GitStore 2977 + 2978 + __all__ = ["FeedParser", "GitStore"] 2979 + </file> 2980 + 2981 + <file path="src/thicket/core/feed_parser.py"> 2982 + """Feed parsing and normalization with auto-discovery.""" 2983 + 2984 + from datetime import datetime 2985 + from typing import Optional 2986 + from urllib.parse import urlparse 2987 + 2988 + import bleach 2989 + import feedparser 2990 + import httpx 2991 + from pydantic import HttpUrl, ValidationError 2992 + 2993 + from ..models import AtomEntry, FeedMetadata 2994 + 2995 + 2996 + class FeedParser: 2997 + """Parser for RSS/Atom feeds with normalization and auto-discovery.""" 2998 + 2999 + def __init__(self, user_agent: str = "thicket/0.1.0"): 3000 + """Initialize the feed parser.""" 3001 + self.user_agent = user_agent 3002 + self.allowed_tags = [ 3003 + "a", "abbr", "acronym", "b", "blockquote", "br", "code", "em", 3004 + "i", "li", "ol", "p", "pre", "strong", "ul", "h1", "h2", "h3", 3005 + "h4", "h5", "h6", "img", "div", "span", 3006 + ] 3007 + self.allowed_attributes = { 3008 + "a": ["href", "title"], 3009 + "abbr": ["title"], 3010 + "acronym": ["title"], 3011 + "img": ["src", "alt", "title", "width", "height"], 3012 + "blockquote": ["cite"], 3013 + } 3014 + 3015 + async def fetch_feed(self, url: HttpUrl) -> str: 3016 + """Fetch feed content from URL.""" 3017 + async with httpx.AsyncClient() as client: 3018 + response = await client.get( 3019 + str(url), 3020 + headers={"User-Agent": self.user_agent}, 3021 + timeout=30.0, 3022 + follow_redirects=True, 3023 + ) 3024 + response.raise_for_status() 3025 + return response.text 3026 + 3027 + def parse_feed(self, content: str, source_url: Optional[HttpUrl] = None) -> tuple[FeedMetadata, list[AtomEntry]]: 3028 + """Parse feed content and return metadata and entries.""" 3029 + parsed = feedparser.parse(content) 3030 + 3031 + if parsed.bozo and parsed.bozo_exception: 3032 + # Try to continue with potentially malformed feed 3033 + pass 3034 + 3035 + # Extract feed metadata 3036 + feed_meta = self._extract_feed_metadata(parsed.feed) 3037 + 3038 + # Extract and normalize entries 3039 + entries = [] 3040 + for entry in parsed.entries: 3041 + try: 3042 + atom_entry = self._normalize_entry(entry, source_url) 3043 + entries.append(atom_entry) 3044 + except Exception as e: 3045 + # Log error but continue processing other entries 3046 + print(f"Error processing entry {getattr(entry, 'id', 'unknown')}: {e}") 3047 + continue 3048 + 3049 + return feed_meta, entries 3050 + 3051 + def _extract_feed_metadata(self, feed: feedparser.FeedParserDict) -> FeedMetadata: 3052 + """Extract metadata from feed for auto-discovery.""" 3053 + # Parse author information 3054 + author_name = None 3055 + author_email = None 3056 + author_uri = None 3057 + 3058 + if hasattr(feed, 'author_detail'): 3059 + author_name = feed.author_detail.get('name') 3060 + author_email = feed.author_detail.get('email') 3061 + author_uri = feed.author_detail.get('href') 3062 + elif hasattr(feed, 'author'): 3063 + author_name = feed.author 3064 + 3065 + # Parse managing editor for RSS feeds 3066 + if not author_email and hasattr(feed, 'managingEditor'): 3067 + author_email = feed.managingEditor 3068 + 3069 + # Parse feed link 3070 + feed_link = None 3071 + if hasattr(feed, 'link'): 3072 + try: 3073 + feed_link = HttpUrl(feed.link) 3074 + except ValidationError: 3075 + pass 3076 + 3077 + # Parse image/icon/logo 3078 + logo = None 3079 + icon = None 3080 + image_url = None 3081 + 3082 + if hasattr(feed, 'image'): 3083 + try: 3084 + image_url = HttpUrl(feed.image.get('href', feed.image.get('url', ''))) 3085 + except (ValidationError, AttributeError): 3086 + pass 3087 + 3088 + if hasattr(feed, 'icon'): 3089 + try: 3090 + icon = HttpUrl(feed.icon) 3091 + except ValidationError: 3092 + pass 3093 + 3094 + if hasattr(feed, 'logo'): 3095 + try: 3096 + logo = HttpUrl(feed.logo) 3097 + except ValidationError: 3098 + pass 3099 + 3100 + return FeedMetadata( 3101 + title=getattr(feed, 'title', None), 3102 + author_name=author_name, 3103 + author_email=author_email, 3104 + author_uri=HttpUrl(author_uri) if author_uri else None, 3105 + link=feed_link, 3106 + logo=logo, 3107 + icon=icon, 3108 + image_url=image_url, 3109 + description=getattr(feed, 'description', None), 3110 + ) 3111 + 3112 + def _normalize_entry(self, entry: feedparser.FeedParserDict, source_url: Optional[HttpUrl] = None) -> AtomEntry: 3113 + """Normalize an entry to Atom format.""" 3114 + # Parse timestamps 3115 + updated = self._parse_timestamp(entry.get('updated_parsed') or entry.get('published_parsed')) 3116 + published = self._parse_timestamp(entry.get('published_parsed')) 3117 + 3118 + # Parse content 3119 + content = self._extract_content(entry) 3120 + content_type = self._extract_content_type(entry) 3121 + 3122 + # Parse author 3123 + author = self._extract_author(entry) 3124 + 3125 + # Parse categories/tags 3126 + categories = [] 3127 + if hasattr(entry, 'tags'): 3128 + categories = [tag.get('term', '') for tag in entry.tags if tag.get('term')] 3129 + 3130 + # Sanitize HTML content 3131 + if content: 3132 + content = self._sanitize_html(content) 3133 + 3134 + summary = entry.get('summary', '') 3135 + if summary: 3136 + summary = self._sanitize_html(summary) 3137 + 3138 + return AtomEntry( 3139 + id=entry.get('id', entry.get('link', '')), 3140 + title=entry.get('title', ''), 3141 + link=HttpUrl(entry.get('link', '')), 3142 + updated=updated, 3143 + published=published, 3144 + summary=summary or None, 3145 + content=content or None, 3146 + content_type=content_type, 3147 + author=author, 3148 + categories=categories, 3149 + rights=entry.get('rights', None), 3150 + source=str(source_url) if source_url else None, 3151 + ) 3152 + 3153 + def _parse_timestamp(self, time_struct) -> datetime: 3154 + """Parse feedparser time struct to datetime.""" 3155 + if time_struct: 3156 + return datetime(*time_struct[:6]) 3157 + return datetime.now() 3158 + 3159 + def _extract_content(self, entry: feedparser.FeedParserDict) -> Optional[str]: 3160 + """Extract the best content from an entry.""" 3161 + # Prefer content over summary 3162 + if hasattr(entry, 'content') and entry.content: 3163 + # Find the best content (prefer text/html, then text/plain) 3164 + for content_item in entry.content: 3165 + if content_item.get('type') in ['text/html', 'html']: 3166 + return content_item.get('value', '') 3167 + elif content_item.get('type') in ['text/plain', 'text']: 3168 + return content_item.get('value', '') 3169 + # Fallback to first content item 3170 + return entry.content[0].get('value', '') 3171 + 3172 + # Fallback to summary 3173 + return entry.get('summary', '') 3174 + 3175 + def _extract_content_type(self, entry: feedparser.FeedParserDict) -> str: 3176 + """Extract content type from entry.""" 3177 + if hasattr(entry, 'content') and entry.content: 3178 + content_type = entry.content[0].get('type', 'html') 3179 + # Normalize content type 3180 + if content_type in ['text/html', 'html']: 3181 + return 'html' 3182 + elif content_type in ['text/plain', 'text']: 3183 + return 'text' 3184 + elif content_type == 'xhtml': 3185 + return 'xhtml' 3186 + return 'html' 3187 + 3188 + def _extract_author(self, entry: feedparser.FeedParserDict) -> Optional[dict]: 3189 + """Extract author information from entry.""" 3190 + author = {} 3191 + 3192 + if hasattr(entry, 'author_detail'): 3193 + author.update({ 3194 + 'name': entry.author_detail.get('name'), 3195 + 'email': entry.author_detail.get('email'), 3196 + 'uri': entry.author_detail.get('href'), 3197 + }) 3198 + elif hasattr(entry, 'author'): 3199 + author['name'] = entry.author 3200 + 3201 + return author if author else None 3202 + 3203 + def _sanitize_html(self, html: str) -> str: 3204 + """Sanitize HTML content to prevent XSS.""" 3205 + return bleach.clean( 3206 + html, 3207 + tags=self.allowed_tags, 3208 + attributes=self.allowed_attributes, 3209 + strip=True, 3210 + ) 3211 + 3212 + def sanitize_entry_id(self, entry_id: str) -> str: 3213 + """Sanitize entry ID to be a safe filename.""" 3214 + # Parse URL to get meaningful parts 3215 + parsed = urlparse(entry_id) 3216 + 3217 + # Start with the path component 3218 + if parsed.path: 3219 + # Remove leading slash and replace problematic characters 3220 + safe_id = parsed.path.lstrip('/').replace('/', '_').replace('\\', '_') 3221 + else: 3222 + # Use the entire ID as fallback 3223 + safe_id = entry_id 3224 + 3225 + # Replace problematic characters 3226 + safe_chars = [] 3227 + for char in safe_id: 3228 + if char.isalnum() or char in '-_.': 3229 + safe_chars.append(char) 3230 + else: 3231 + safe_chars.append('_') 3232 + 3233 + safe_id = ''.join(safe_chars) 3234 + 3235 + # Ensure it's not too long (max 200 chars) 3236 + if len(safe_id) > 200: 3237 + safe_id = safe_id[:200] 3238 + 3239 + # Ensure it's not empty 3240 + if not safe_id: 3241 + safe_id = "entry" 3242 + 3243 + return safe_id 3244 + </file> 3245 + 3246 + <file path="src/thicket/core/reference_parser.py"> 3247 + """Reference detection and parsing for blog entries.""" 3248 + 3249 + import re 3250 + from typing import Optional 3251 + from urllib.parse import urlparse 3252 + 3253 + from ..models import AtomEntry 3254 + 3255 + 3256 + class BlogReference: 3257 + """Represents a reference from one blog entry to another.""" 3258 + 3259 + def __init__( 3260 + self, 3261 + source_entry_id: str, 3262 + source_username: str, 3263 + target_url: str, 3264 + target_username: Optional[str] = None, 3265 + target_entry_id: Optional[str] = None, 3266 + ): 3267 + self.source_entry_id = source_entry_id 3268 + self.source_username = source_username 3269 + self.target_url = target_url 3270 + self.target_username = target_username 3271 + self.target_entry_id = target_entry_id 3272 + 3273 + def to_dict(self) -> dict: 3274 + """Convert to dictionary for JSON serialization.""" 3275 + result = { 3276 + "source_entry_id": self.source_entry_id, 3277 + "source_username": self.source_username, 3278 + "target_url": self.target_url, 3279 + } 3280 + 3281 + # Only include optional fields if they are not None 3282 + if self.target_username is not None: 3283 + result["target_username"] = self.target_username 3284 + if self.target_entry_id is not None: 3285 + result["target_entry_id"] = self.target_entry_id 3286 + 3287 + return result 3288 + 3289 + @classmethod 3290 + def from_dict(cls, data: dict) -> "BlogReference": 3291 + """Create from dictionary.""" 3292 + return cls( 3293 + source_entry_id=data["source_entry_id"], 3294 + source_username=data["source_username"], 3295 + target_url=data["target_url"], 3296 + target_username=data.get("target_username"), 3297 + target_entry_id=data.get("target_entry_id"), 3298 + ) 3299 + 3300 + 3301 + class ReferenceIndex: 3302 + """Index of blog-to-blog references for creating threaded views.""" 3303 + 3304 + def __init__(self): 3305 + self.references: list[BlogReference] = [] 3306 + self.outbound_refs: dict[ 3307 + str, list[BlogReference] 3308 + ] = {} # entry_id -> outbound refs 3309 + self.inbound_refs: dict[ 3310 + str, list[BlogReference] 3311 + ] = {} # entry_id -> inbound refs 3312 + self.user_domains: dict[str, set[str]] = {} # username -> set of domains 3313 + 3314 + def add_reference(self, ref: BlogReference) -> None: 3315 + """Add a reference to the index.""" 3316 + self.references.append(ref) 3317 + 3318 + # Update outbound references 3319 + source_key = f"{ref.source_username}:{ref.source_entry_id}" 3320 + if source_key not in self.outbound_refs: 3321 + self.outbound_refs[source_key] = [] 3322 + self.outbound_refs[source_key].append(ref) 3323 + 3324 + # Update inbound references if we can identify the target 3325 + if ref.target_username and ref.target_entry_id: 3326 + target_key = f"{ref.target_username}:{ref.target_entry_id}" 3327 + if target_key not in self.inbound_refs: 3328 + self.inbound_refs[target_key] = [] 3329 + self.inbound_refs[target_key].append(ref) 3330 + 3331 + def get_outbound_refs(self, username: str, entry_id: str) -> list[BlogReference]: 3332 + """Get all outbound references from an entry.""" 3333 + key = f"{username}:{entry_id}" 3334 + return self.outbound_refs.get(key, []) 3335 + 3336 + def get_inbound_refs(self, username: str, entry_id: str) -> list[BlogReference]: 3337 + """Get all inbound references to an entry.""" 3338 + key = f"{username}:{entry_id}" 3339 + return self.inbound_refs.get(key, []) 3340 + 3341 + def get_thread_members(self, username: str, entry_id: str) -> set[tuple[str, str]]: 3342 + """Get all entries that are part of the same thread.""" 3343 + visited = set() 3344 + to_visit = [(username, entry_id)] 3345 + thread_members = set() 3346 + 3347 + while to_visit: 3348 + current_user, current_entry = to_visit.pop() 3349 + if (current_user, current_entry) in visited: 3350 + continue 3351 + 3352 + visited.add((current_user, current_entry)) 3353 + thread_members.add((current_user, current_entry)) 3354 + 3355 + # Add outbound references 3356 + for ref in self.get_outbound_refs(current_user, current_entry): 3357 + if ref.target_username and ref.target_entry_id: 3358 + to_visit.append((ref.target_username, ref.target_entry_id)) 3359 + 3360 + # Add inbound references 3361 + for ref in self.get_inbound_refs(current_user, current_entry): 3362 + to_visit.append((ref.source_username, ref.source_entry_id)) 3363 + 3364 + return thread_members 3365 + 3366 + def to_dict(self) -> dict: 3367 + """Convert to dictionary for JSON serialization.""" 3368 + return { 3369 + "references": [ref.to_dict() for ref in self.references], 3370 + "user_domains": {k: list(v) for k, v in self.user_domains.items()}, 3371 + } 3372 + 3373 + @classmethod 3374 + def from_dict(cls, data: dict) -> "ReferenceIndex": 3375 + """Create from dictionary.""" 3376 + index = cls() 3377 + for ref_data in data.get("references", []): 3378 + ref = BlogReference.from_dict(ref_data) 3379 + index.add_reference(ref) 3380 + 3381 + for username, domains in data.get("user_domains", {}).items(): 3382 + index.user_domains[username] = set(domains) 3383 + 3384 + return index 3385 + 3386 + 3387 + class ReferenceParser: 3388 + """Parses blog entries to detect references to other blogs.""" 3389 + 3390 + def __init__(self): 3391 + # Common blog platforms and patterns 3392 + self.blog_patterns = [ 3393 + r"https?://[^/]+\.(?:org|com|net|io|dev|me|co\.uk)/.*", # Common blog domains 3394 + r"https?://[^/]+\.github\.io/.*", # GitHub Pages 3395 + r"https?://[^/]+\.substack\.com/.*", # Substack 3396 + r"https?://medium\.com/.*", # Medium 3397 + r"https?://[^/]+\.wordpress\.com/.*", # WordPress.com 3398 + r"https?://[^/]+\.blogspot\.com/.*", # Blogger 3399 + ] 3400 + 3401 + # Compile regex patterns 3402 + self.link_pattern = re.compile( 3403 + r'<a[^>]+href="([^"]+)"[^>]*>(.*?)</a>', re.IGNORECASE | re.DOTALL 3404 + ) 3405 + self.url_pattern = re.compile(r'https?://[^\s<>"]+') 3406 + 3407 + def extract_links_from_html(self, html_content: str) -> list[tuple[str, str]]: 3408 + """Extract all links from HTML content.""" 3409 + links = [] 3410 + 3411 + # Extract links from <a> tags 3412 + for match in self.link_pattern.finditer(html_content): 3413 + url = match.group(1) 3414 + text = re.sub( 3415 + r"<[^>]+>", "", match.group(2) 3416 + ).strip() # Remove HTML tags from link text 3417 + links.append((url, text)) 3418 + 3419 + return links 3420 + 3421 + def is_blog_url(self, url: str) -> bool: 3422 + """Check if a URL likely points to a blog post.""" 3423 + for pattern in self.blog_patterns: 3424 + if re.match(pattern, url): 3425 + return True 3426 + return False 3427 + 3428 + def _is_likely_blog_post_url(self, url: str) -> bool: 3429 + """Check if a same-domain URL likely points to a blog post (not CSS, images, etc.).""" 3430 + parsed_url = urlparse(url) 3431 + path = parsed_url.path.lower() 3432 + 3433 + # Skip obvious non-blog content 3434 + if any(path.endswith(ext) for ext in ['.css', '.js', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.pdf', '.xml', '.json']): 3435 + return False 3436 + 3437 + # Skip common non-blog paths 3438 + if any(segment in path for segment in ['/static/', '/assets/', '/css/', '/js/', '/images/', '/img/', '/media/', '/uploads/']): 3439 + return False 3440 + 3441 + # Skip fragment-only links (same page anchors) 3442 + if not path or path == '/': 3443 + return False 3444 + 3445 + # Look for positive indicators of blog posts 3446 + # Common blog post patterns: dates, slugs, post indicators 3447 + blog_indicators = [ 3448 + r'/\d{4}/', # Year in path 3449 + r'/\d{4}/\d{2}/', # Year/month in path 3450 + r'/blog/', 3451 + r'/post/', 3452 + r'/posts/', 3453 + r'/articles?/', 3454 + r'/notes?/', 3455 + r'/entries/', 3456 + r'/writing/', 3457 + ] 3458 + 3459 + for pattern in blog_indicators: 3460 + if re.search(pattern, path): 3461 + return True 3462 + 3463 + # If it has a reasonable path depth and doesn't match exclusions, likely a blog post 3464 + path_segments = [seg for seg in path.split('/') if seg] 3465 + return len(path_segments) >= 1 # At least one meaningful path segment 3466 + 3467 + def resolve_target_user( 3468 + self, url: str, user_domains: dict[str, set[str]] 3469 + ) -> Optional[str]: 3470 + """Try to resolve a URL to a known user based on domain mapping.""" 3471 + parsed_url = urlparse(url) 3472 + domain = parsed_url.netloc.lower() 3473 + 3474 + for username, domains in user_domains.items(): 3475 + if domain in domains: 3476 + return username 3477 + 3478 + return None 3479 + 3480 + def extract_references( 3481 + self, entry: AtomEntry, username: str, user_domains: dict[str, set[str]] 3482 + ) -> list[BlogReference]: 3483 + """Extract all blog references from an entry.""" 3484 + references = [] 3485 + 3486 + # Combine all text content for analysis 3487 + content_to_search = [] 3488 + if entry.content: 3489 + content_to_search.append(entry.content) 3490 + if entry.summary: 3491 + content_to_search.append(entry.summary) 3492 + 3493 + for content in content_to_search: 3494 + links = self.extract_links_from_html(content) 3495 + 3496 + for url, _link_text in links: 3497 + entry_domain = ( 3498 + urlparse(str(entry.link)).netloc.lower() if entry.link else "" 3499 + ) 3500 + link_domain = urlparse(url).netloc.lower() 3501 + 3502 + # Check if this looks like a blog URL 3503 + if not self.is_blog_url(url): 3504 + continue 3505 + 3506 + # For same-domain links, apply additional filtering to avoid non-blog content 3507 + if link_domain == entry_domain: 3508 + # Only include same-domain links that look like blog posts 3509 + if not self._is_likely_blog_post_url(url): 3510 + continue 3511 + 3512 + # Try to resolve to a known user 3513 + if link_domain == entry_domain: 3514 + # Same domain - target user is the same as source user 3515 + target_username: Optional[str] = username 3516 + else: 3517 + # Different domain - try to resolve 3518 + target_username = self.resolve_target_user(url, user_domains) 3519 + 3520 + ref = BlogReference( 3521 + source_entry_id=entry.id, 3522 + source_username=username, 3523 + target_url=url, 3524 + target_username=target_username, 3525 + target_entry_id=None, # Will be resolved later if possible 3526 + ) 3527 + 3528 + references.append(ref) 3529 + 3530 + return references 3531 + 3532 + def build_user_domain_mapping(self, git_store: "GitStore") -> dict[str, set[str]]: 3533 + """Build mapping of usernames to their known domains.""" 3534 + user_domains = {} 3535 + index = git_store._load_index() 3536 + 3537 + for username, user_metadata in index.users.items(): 3538 + domains = set() 3539 + 3540 + # Add domains from feeds 3541 + for feed_url in user_metadata.feeds: 3542 + domain = urlparse(feed_url).netloc.lower() 3543 + if domain: 3544 + domains.add(domain) 3545 + 3546 + # Add domain from homepage 3547 + if user_metadata.homepage: 3548 + domain = urlparse(str(user_metadata.homepage)).netloc.lower() 3549 + if domain: 3550 + domains.add(domain) 3551 + 3552 + user_domains[username] = domains 3553 + 3554 + return user_domains 3555 + 3556 + def _build_url_to_entry_mapping(self, git_store: "GitStore") -> dict[str, str]: 3557 + """Build a comprehensive mapping from URLs to entry IDs using git store data. 3558 + 3559 + This creates a bidirectional mapping that handles: 3560 + - Entry link URLs -> Entry IDs 3561 + - URL variations (with/without www, http/https) 3562 + - Multiple URLs pointing to the same entry 3563 + """ 3564 + url_to_entry: dict[str, str] = {} 3565 + 3566 + # Load index to get all users 3567 + index = git_store._load_index() 3568 + 3569 + for username in index.users.keys(): 3570 + entries = git_store.list_entries(username) 3571 + 3572 + for entry in entries: 3573 + if entry.link: 3574 + link_url = str(entry.link) 3575 + entry_id = entry.id 3576 + 3577 + # Map the canonical link URL 3578 + url_to_entry[link_url] = entry_id 3579 + 3580 + # Handle common URL variations 3581 + parsed = urlparse(link_url) 3582 + if parsed.netloc and parsed.path: 3583 + # Add version without www 3584 + if parsed.netloc.startswith('www.'): 3585 + no_www_url = f"{parsed.scheme}://{parsed.netloc[4:]}{parsed.path}" 3586 + if parsed.query: 3587 + no_www_url += f"?{parsed.query}" 3588 + if parsed.fragment: 3589 + no_www_url += f"#{parsed.fragment}" 3590 + url_to_entry[no_www_url] = entry_id 3591 + 3592 + # Add version with www if not present 3593 + elif not parsed.netloc.startswith('www.'): 3594 + www_url = f"{parsed.scheme}://www.{parsed.netloc}{parsed.path}" 3595 + if parsed.query: 3596 + www_url += f"?{parsed.query}" 3597 + if parsed.fragment: 3598 + www_url += f"#{parsed.fragment}" 3599 + url_to_entry[www_url] = entry_id 3600 + 3601 + # Add http/https variations 3602 + if parsed.scheme == 'https': 3603 + http_url = link_url.replace('https://', 'http://', 1) 3604 + url_to_entry[http_url] = entry_id 3605 + elif parsed.scheme == 'http': 3606 + https_url = link_url.replace('http://', 'https://', 1) 3607 + url_to_entry[https_url] = entry_id 3608 + 3609 + return url_to_entry 3610 + 3611 + def _normalize_url(self, url: str) -> str: 3612 + """Normalize URL for consistent matching. 3613 + 3614 + Handles common variations like trailing slashes, fragments, etc. 3615 + """ 3616 + parsed = urlparse(url) 3617 + 3618 + # Remove trailing slash from path 3619 + path = parsed.path.rstrip('/') if parsed.path != '/' else parsed.path 3620 + 3621 + # Reconstruct without fragment for consistent matching 3622 + normalized = f"{parsed.scheme}://{parsed.netloc}{path}" 3623 + if parsed.query: 3624 + normalized += f"?{parsed.query}" 3625 + 3626 + return normalized 3627 + 3628 + def resolve_target_entry_ids( 3629 + self, references: list[BlogReference], git_store: "GitStore" 3630 + ) -> list[BlogReference]: 3631 + """Resolve target_entry_id for references using comprehensive URL mapping.""" 3632 + resolved_refs = [] 3633 + 3634 + # Build comprehensive URL to entry ID mapping 3635 + url_to_entry = self._build_url_to_entry_mapping(git_store) 3636 + 3637 + for ref in references: 3638 + # If we already have a target_entry_id, keep the reference as-is 3639 + if ref.target_entry_id is not None: 3640 + resolved_refs.append(ref) 3641 + continue 3642 + 3643 + # If we don't have a target_username, we can't resolve it 3644 + if ref.target_username is None: 3645 + resolved_refs.append(ref) 3646 + continue 3647 + 3648 + # Try to resolve using URL mapping 3649 + resolved_entry_id = None 3650 + 3651 + # First, try exact match 3652 + if ref.target_url in url_to_entry: 3653 + resolved_entry_id = url_to_entry[ref.target_url] 3654 + else: 3655 + # Try normalized URL matching 3656 + normalized_target = self._normalize_url(ref.target_url) 3657 + if normalized_target in url_to_entry: 3658 + resolved_entry_id = url_to_entry[normalized_target] 3659 + else: 3660 + # Try URL variations 3661 + for mapped_url, entry_id in url_to_entry.items(): 3662 + if self._normalize_url(mapped_url) == normalized_target: 3663 + resolved_entry_id = entry_id 3664 + break 3665 + 3666 + # Verify the resolved entry belongs to the target username 3667 + if resolved_entry_id: 3668 + # Double-check by loading the actual entry 3669 + entries = git_store.list_entries(ref.target_username) 3670 + entry_found = any(entry.id == resolved_entry_id for entry in entries) 3671 + if not entry_found: 3672 + resolved_entry_id = None 3673 + 3674 + # Create a new reference with the resolved target_entry_id 3675 + resolved_ref = BlogReference( 3676 + source_entry_id=ref.source_entry_id, 3677 + source_username=ref.source_username, 3678 + target_url=ref.target_url, 3679 + target_username=ref.target_username, 3680 + target_entry_id=resolved_entry_id, 3681 + ) 3682 + resolved_refs.append(resolved_ref) 3683 + 3684 + return resolved_refs 3685 + </file> 3686 + 3687 + <file path="src/thicket/models/__init__.py"> 3688 + """Data models for thicket.""" 3689 + 3690 + from .config import ThicketConfig, UserConfig 3691 + from .feed import AtomEntry, DuplicateMap, FeedMetadata 3692 + from .user import GitStoreIndex, UserMetadata 3693 + 3694 + __all__ = [ 3695 + "ThicketConfig", 3696 + "UserConfig", 3697 + "AtomEntry", 3698 + "DuplicateMap", 3699 + "FeedMetadata", 3700 + "GitStoreIndex", 3701 + "UserMetadata", 3702 + ] 3703 + </file> 3704 + 3705 + <file path="src/thicket/models/feed.py"> 3706 + """Feed and entry models for thicket.""" 3707 + 3708 + from datetime import datetime 3709 + from typing import TYPE_CHECKING, Optional 3710 + 3711 + from pydantic import BaseModel, ConfigDict, EmailStr, HttpUrl 3712 + 3713 + if TYPE_CHECKING: 3714 + from .config import UserConfig 3715 + 3716 + 3717 + class AtomEntry(BaseModel): 3718 + """Represents an Atom feed entry stored in the Git repository.""" 3719 + 3720 + model_config = ConfigDict( 3721 + json_encoders={datetime: lambda v: v.isoformat()}, 3722 + str_strip_whitespace=True, 3723 + ) 3724 + 3725 + id: str # Original Atom ID 3726 + title: str 3727 + link: HttpUrl 3728 + updated: datetime 3729 + published: Optional[datetime] = None 3730 + summary: Optional[str] = None 3731 + content: Optional[str] = None # Full body content from Atom entry 3732 + content_type: Optional[str] = "html" # text, html, xhtml 3733 + author: Optional[dict] = None 3734 + categories: list[str] = [] 3735 + rights: Optional[str] = None # Copyright info 3736 + source: Optional[str] = None # Source feed URL 3737 + 3738 + 3739 + class FeedMetadata(BaseModel): 3740 + """Metadata extracted from a feed for auto-discovery.""" 3741 + 3742 + title: Optional[str] = None 3743 + author_name: Optional[str] = None 3744 + author_email: Optional[EmailStr] = None 3745 + author_uri: Optional[HttpUrl] = None 3746 + link: Optional[HttpUrl] = None 3747 + logo: Optional[HttpUrl] = None 3748 + icon: Optional[HttpUrl] = None 3749 + image_url: Optional[HttpUrl] = None 3750 + description: Optional[str] = None 3751 + 3752 + def to_user_config(self, username: str, feed_url: HttpUrl) -> "UserConfig": 3753 + """Convert discovered metadata to UserConfig with fallbacks.""" 3754 + from .config import UserConfig 3755 + 3756 + return UserConfig( 3757 + username=username, 3758 + feeds=[feed_url], 3759 + display_name=self.author_name or self.title, 3760 + email=self.author_email, 3761 + homepage=self.author_uri or self.link, 3762 + icon=self.logo or self.icon or self.image_url, 3763 + ) 3764 + 3765 + 3766 + class DuplicateMap(BaseModel): 3767 + """Maps duplicate entry IDs to canonical entry IDs.""" 3768 + 3769 + duplicates: dict[str, str] = {} # duplicate_id -> canonical_id 3770 + comment: str = "Entry IDs that map to the same canonical content" 3771 + 3772 + def add_duplicate(self, duplicate_id: str, canonical_id: str) -> None: 3773 + """Add a duplicate mapping.""" 3774 + self.duplicates[duplicate_id] = canonical_id 3775 + 3776 + def remove_duplicate(self, duplicate_id: str) -> bool: 3777 + """Remove a duplicate mapping. Returns True if existed.""" 3778 + return self.duplicates.pop(duplicate_id, None) is not None 3779 + 3780 + def get_canonical(self, entry_id: str) -> str: 3781 + """Get canonical ID for an entry (returns original if not duplicate).""" 3782 + return self.duplicates.get(entry_id, entry_id) 3783 + 3784 + def is_duplicate(self, entry_id: str) -> bool: 3785 + """Check if entry ID is marked as duplicate.""" 3786 + return entry_id in self.duplicates 3787 + 3788 + def get_duplicates_for_canonical(self, canonical_id: str) -> list[str]: 3789 + """Get all duplicate IDs that map to a canonical ID.""" 3790 + return [ 3791 + duplicate_id 3792 + for duplicate_id, canonical in self.duplicates.items() 3793 + if canonical == canonical_id 3794 + ] 3795 + </file> 3796 + 3797 + <file path="src/thicket/models/user.py"> 3798 + """User metadata models for thicket.""" 3799 + 3800 + from datetime import datetime 3801 + from typing import Optional 3802 + 3803 + from pydantic import BaseModel, ConfigDict 3804 + 3805 + 3806 + class UserMetadata(BaseModel): 3807 + """Metadata about a user stored in the Git repository.""" 3808 + 3809 + model_config = ConfigDict( 3810 + json_encoders={datetime: lambda v: v.isoformat()}, 3811 + str_strip_whitespace=True, 3812 + ) 3813 + 3814 + username: str 3815 + display_name: Optional[str] = None 3816 + email: Optional[str] = None 3817 + homepage: Optional[str] = None 3818 + icon: Optional[str] = None 3819 + feeds: list[str] = [] 3820 + directory: str # Directory name in Git store 3821 + created: datetime 3822 + last_updated: datetime 3823 + entry_count: int = 0 3824 + 3825 + def update_timestamp(self) -> None: 3826 + """Update the last_updated timestamp to now.""" 3827 + self.last_updated = datetime.now() 3828 + 3829 + def increment_entry_count(self, count: int = 1) -> None: 3830 + """Increment the entry count by the given amount.""" 3831 + self.entry_count += count 3832 + self.update_timestamp() 3833 + 3834 + 3835 + class GitStoreIndex(BaseModel): 3836 + """Index of all users and their directories in the Git store.""" 3837 + 3838 + model_config = ConfigDict( 3839 + json_encoders={datetime: lambda v: v.isoformat()} 3840 + ) 3841 + 3842 + users: dict[str, UserMetadata] = {} # username -> UserMetadata 3843 + created: datetime 3844 + last_updated: datetime 3845 + total_entries: int = 0 3846 + 3847 + def add_user(self, user_metadata: UserMetadata) -> None: 3848 + """Add or update a user in the index.""" 3849 + self.users[user_metadata.username] = user_metadata 3850 + self.last_updated = datetime.now() 3851 + 3852 + def remove_user(self, username: str) -> bool: 3853 + """Remove a user from the index. Returns True if user existed.""" 3854 + if username in self.users: 3855 + del self.users[username] 3856 + self.last_updated = datetime.now() 3857 + return True 3858 + return False 3859 + 3860 + def get_user(self, username: str) -> Optional[UserMetadata]: 3861 + """Get user metadata by username.""" 3862 + return self.users.get(username) 3863 + 3864 + def update_entry_count(self, username: str, count: int) -> None: 3865 + """Update entry count for a user and total.""" 3866 + user = self.get_user(username) 3867 + if user: 3868 + user.increment_entry_count(count) 3869 + self.total_entries += count 3870 + self.last_updated = datetime.now() 3871 + 3872 + def recalculate_totals(self) -> None: 3873 + """Recalculate total entries from all users.""" 3874 + self.total_entries = sum(user.entry_count for user in self.users.values()) 3875 + self.last_updated = datetime.now() 3876 + </file> 3877 + 3878 + <file path="src/thicket/utils/__init__.py"> 3879 + """Utility modules for thicket.""" 3880 + 3881 + # This module will contain shared utilities 3882 + # For now, it's empty but can be expanded with common functions 3883 + </file> 3884 + 3885 + <file path="src/thicket/__init__.py"> 3886 + """Thicket: A CLI tool for persisting Atom/RSS feeds in Git repositories.""" 3887 + 3888 + __version__ = "0.1.0" 3889 + __author__ = "thicket" 3890 + __email__ = "thicket@example.com" 3891 + </file> 3892 + 3893 + <file path="src/thicket/__main__.py"> 3894 + """Entry point for running thicket as a module.""" 3895 + 3896 + from .cli.main import app 3897 + 3898 + if __name__ == "__main__": 3899 + app() 3900 + </file> 3901 + 3902 + <file path=".gitignore"> 3903 + # Byte-compiled / optimized / DLL files 3904 + __pycache__/ 3905 + *.py[codz] 3906 + *$py.class 3907 + 3908 + # C extensions 3909 + *.so 3910 + 3911 + # Distribution / packaging 3912 + .Python 3913 + build/ 3914 + develop-eggs/ 3915 + dist/ 3916 + downloads/ 3917 + eggs/ 3918 + .eggs/ 3919 + lib/ 3920 + lib64/ 3921 + parts/ 3922 + sdist/ 3923 + var/ 3924 + wheels/ 3925 + share/python-wheels/ 3926 + *.egg-info/ 3927 + .installed.cfg 3928 + *.egg 3929 + MANIFEST 3930 + 3931 + # PyInstaller 3932 + # Usually these files are written by a python script from a template 3933 + # before PyInstaller builds the exe, so as to inject date/other infos into it. 3934 + *.manifest 3935 + *.spec 3936 + 3937 + # Installer logs 3938 + pip-log.txt 3939 + pip-delete-this-directory.txt 3940 + 3941 + # Unit test / coverage reports 3942 + htmlcov/ 3943 + .tox/ 3944 + .nox/ 3945 + .coverage 3946 + .coverage.* 3947 + .cache 3948 + nosetests.xml 3949 + coverage.xml 3950 + *.cover 3951 + *.py.cover 3952 + .hypothesis/ 3953 + .pytest_cache/ 3954 + cover/ 3955 + 3956 + # Translations 3957 + *.mo 3958 + *.pot 3959 + 3960 + # Django stuff: 3961 + *.log 3962 + local_settings.py 3963 + db.sqlite3 3964 + db.sqlite3-journal 3965 + 3966 + # Flask stuff: 3967 + instance/ 3968 + .webassets-cache 3969 + 3970 + # Scrapy stuff: 3971 + .scrapy 3972 + 3973 + # Sphinx documentation 3974 + docs/_build/ 3975 + 3976 + # PyBuilder 3977 + .pybuilder/ 3978 + target/ 3979 + 3980 + # Jupyter Notebook 3981 + .ipynb_checkpoints 3982 + 3983 + # IPython 3984 + profile_default/ 3985 + ipython_config.py 3986 + 3987 + # pyenv 3988 + # For a library or package, you might want to ignore these files since the code is 3989 + # intended to run in multiple environments; otherwise, check them in: 3990 + # .python-version 3991 + 3992 + # pipenv 3993 + # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 3994 + # However, in case of collaboration, if having platform-specific dependencies or dependencies 3995 + # having no cross-platform support, pipenv may install dependencies that don't work, or not 3996 + # install all needed dependencies. 3997 + #Pipfile.lock 3998 + 3999 + # UV 4000 + # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 4001 + # This is especially recommended for binary packages to ensure reproducibility, and is more 4002 + # commonly ignored for libraries. 4003 + #uv.lock 4004 + 4005 + # poetry 4006 + # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 4007 + # This is especially recommended for binary packages to ensure reproducibility, and is more 4008 + # commonly ignored for libraries. 4009 + # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 4010 + #poetry.lock 4011 + #poetry.toml 4012 + 4013 + # pdm 4014 + # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 4015 + # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. 4016 + # https://pdm-project.org/en/latest/usage/project/#working-with-version-control 4017 + #pdm.lock 4018 + #pdm.toml 4019 + .pdm-python 4020 + .pdm-build/ 4021 + 4022 + # pixi 4023 + # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. 4024 + #pixi.lock 4025 + # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one 4026 + # in the .venv directory. It is recommended not to include this directory in version control. 4027 + .pixi 4028 + 4029 + # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 4030 + __pypackages__/ 4031 + 4032 + # Celery stuff 4033 + celerybeat-schedule 4034 + celerybeat.pid 4035 + 4036 + # SageMath parsed files 4037 + *.sage.py 4038 + 4039 + # Environments 4040 + .env 4041 + .envrc 4042 + .venv 4043 + env/ 4044 + venv/ 4045 + ENV/ 4046 + env.bak/ 4047 + venv.bak/ 4048 + 4049 + # Spyder project settings 4050 + .spyderproject 4051 + .spyproject 4052 + 4053 + # Rope project settings 4054 + .ropeproject 4055 + 4056 + # mkdocs documentation 4057 + /site 4058 + 4059 + # mypy 4060 + .mypy_cache/ 4061 + .dmypy.json 4062 + dmypy.json 4063 + 4064 + # Pyre type checker 4065 + .pyre/ 4066 + 4067 + # pytype static type analyzer 4068 + .pytype/ 4069 + 4070 + # Cython debug symbols 4071 + cython_debug/ 4072 + 4073 + # PyCharm 4074 + # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 4075 + # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 4076 + # and can be added to the global gitignore or merged into this file. For a more nuclear 4077 + # option (not recommended) you can uncomment the following to ignore the entire idea folder. 4078 + #.idea/ 4079 + 4080 + # Abstra 4081 + # Abstra is an AI-powered process automation framework. 4082 + # Ignore directories containing user credentials, local state, and settings. 4083 + # Learn more at https://abstra.io/docs 4084 + .abstra/ 4085 + 4086 + # Visual Studio Code 4087 + # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 4088 + # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore 4089 + # and can be added to the global gitignore or merged into this file. However, if you prefer, 4090 + # you could uncomment the following to ignore the entire vscode folder 4091 + # .vscode/ 4092 + 4093 + # Ruff stuff: 4094 + .ruff_cache/ 4095 + 4096 + # PyPI configuration file 4097 + .pypirc 4098 + 4099 + # Marimo 4100 + marimo/_static/ 4101 + marimo/_lsp/ 4102 + __marimo__/ 4103 + 4104 + # Streamlit 4105 + .streamlit/secrets.toml 4106 + 4107 + thicket.yaml 4108 + </file> 4109 + 4110 + <file path="CLAUDE.md"> 4111 + My goal is to build a CLI tool called thicket in Python that maintains a Git repository within which Atom feeds can be persisted, including their contents. 4112 + 4113 + # Python Environment and Package Management 4114 + 4115 + This project uses `uv` for Python package management and virtual environment handling. 4116 + 4117 + ## Running Commands 4118 + 4119 + ALWAYS use `uv run` to execute Python commands: 4120 + 4121 + - Run the CLI: `uv run -m thicket` 4122 + - Run tests: `uv run pytest` 4123 + - Type checking: `uv run mypy src/` 4124 + - Linting: `uv run ruff check src/` 4125 + - Format code: `uv run ruff format src/` 4126 + - Compile check: `uv run python -m py_compile <file>` 4127 + 4128 + ## Package Management 4129 + 4130 + - Add dependencies: `uv add <package>` 4131 + - Add dev dependencies: `uv add --dev <package>` 4132 + - Install dependencies: `uv sync` 4133 + - Update dependencies: `uv lock --upgrade` 4134 + 4135 + # Project Structure 4136 + 4137 + The configuration file specifies: 4138 + - the location of a git store 4139 + - a list of usernames and target Atom/RSS feed(s) and optional metadata about the username such as their email, homepage, icon and display name 4140 + - a cache directory to store temporary results such as feed downloads and their last modification date that speed up operations across runs of the tool 4141 + 4142 + The Git data store should: 4143 + - have a subdirectory per user 4144 + - within that directory, an entry per Atom entry indexed by the Atom id for that entry. The id should be sanitised consistently to be a safe filename. RSS feed should be normalized to Atom before storing it. 4145 + - within each entry file, the metadata of the Atom feed converted into a JSON format that preserves as much metadata as possible. 4146 + - have a JSON file in the Git repository that indexes the users, their associated directories within the Git repository, and any other metadata about that user from the config file 4147 + The CLI should be modern and use cool progress bars and any otfrom ecosystem libraries. 4148 + 4149 + The intention behind the Git repository is that it can be queried by other websites in order to build a webblog structure of comments that link to other blogs. 4150 + </file> 4151 + 4152 + <file path="pyproject.toml"> 4153 + [build-system] 4154 + requires = ["hatchling"] 4155 + build-backend = "hatchling.build" 4156 + 4157 + [project] 4158 + name = "thicket" 4159 + dynamic = ["version"] 4160 + description = "A CLI tool for persisting Atom/RSS feeds in Git repositories" 4161 + readme = "README.md" 4162 + license = "MIT" 4163 + requires-python = ">=3.9" 4164 + authors = [ 4165 + {name = "thicket", email = "thicket@example.com"}, 4166 + ] 4167 + classifiers = [ 4168 + "Development Status :: 3 - Alpha", 4169 + "Intended Audience :: Developers", 4170 + "License :: OSI Approved :: MIT License", 4171 + "Operating System :: OS Independent", 4172 + "Programming Language :: Python :: 3", 4173 + "Programming Language :: Python :: 3.9", 4174 + "Programming Language :: Python :: 3.10", 4175 + "Programming Language :: Python :: 3.11", 4176 + "Programming Language :: Python :: 3.12", 4177 + "Programming Language :: Python :: 3.13", 4178 + "Topic :: Internet :: WWW/HTTP :: Dynamic Content :: News/Diary", 4179 + "Topic :: Software Development :: Version Control :: Git", 4180 + "Topic :: Text Processing :: Markup :: XML", 4181 + ] 4182 + dependencies = [ 4183 + "typer>=0.15.0", 4184 + "rich>=13.0.0", 4185 + "GitPython>=3.1.40", 4186 + "feedparser>=6.0.11", 4187 + "pydantic>=2.11.0", 4188 + "pydantic-settings>=2.10.0", 4189 + "httpx>=0.28.0", 4190 + "pendulum>=3.0.0", 4191 + "bleach>=6.0.0", 4192 + "platformdirs>=4.0.0", 4193 + "pyyaml>=6.0.0", 4194 + "email_validator", 4195 + "jinja2>=3.1.6", 4196 + ] 4197 + 4198 + [project.optional-dependencies] 4199 + dev = [ 4200 + "pytest>=8.0.0", 4201 + "pytest-asyncio>=0.24.0", 4202 + "pytest-cov>=6.0.0", 4203 + "black>=24.0.0", 4204 + "ruff>=0.8.0", 4205 + "mypy>=1.13.0", 4206 + "types-PyYAML>=6.0.0", 4207 + ] 4208 + 4209 + [project.urls] 4210 + Homepage = "https://github.com/example/thicket" 4211 + Documentation = "https://github.com/example/thicket" 4212 + Repository = "https://github.com/example/thicket" 4213 + "Bug Tracker" = "https://github.com/example/thicket/issues" 4214 + 4215 + [project.scripts] 4216 + thicket = "thicket.cli.main:app" 4217 + 4218 + [tool.hatch.version] 4219 + path = "src/thicket/__init__.py" 4220 + 4221 + [tool.hatch.build.targets.wheel] 4222 + packages = ["src/thicket"] 4223 + 4224 + [tool.black] 4225 + line-length = 88 4226 + target-version = ['py39'] 4227 + include = '\.pyi?$' 4228 + extend-exclude = ''' 4229 + /( 4230 + # directories 4231 + \.eggs 4232 + | \.git 4233 + | \.hg 4234 + | \.mypy_cache 4235 + | \.tox 4236 + | \.venv 4237 + | build 4238 + | dist 4239 + )/ 4240 + ''' 4241 + 4242 + [tool.ruff] 4243 + target-version = "py39" 4244 + line-length = 88 4245 + 4246 + [tool.ruff.lint] 4247 + select = [ 4248 + "E", # pycodestyle errors 4249 + "W", # pycodestyle warnings 4250 + "F", # pyflakes 4251 + "I", # isort 4252 + "B", # flake8-bugbear 4253 + "C4", # flake8-comprehensions 4254 + "UP", # pyupgrade 4255 + ] 4256 + ignore = [ 4257 + "E501", # line too long, handled by black 4258 + "B008", # do not perform function calls in argument defaults 4259 + "C901", # too complex 4260 + ] 4261 + 4262 + [tool.ruff.lint.per-file-ignores] 4263 + "__init__.py" = ["F401"] 4264 + 4265 + [tool.mypy] 4266 + python_version = "3.9" 4267 + check_untyped_defs = true 4268 + disallow_any_generics = true 4269 + disallow_incomplete_defs = true 4270 + disallow_untyped_defs = true 4271 + no_implicit_optional = true 4272 + warn_redundant_casts = true 4273 + warn_unused_ignores = true 4274 + warn_return_any = true 4275 + strict_optional = true 4276 + 4277 + [[tool.mypy.overrides]] 4278 + module = [ 4279 + "feedparser", 4280 + "git", 4281 + "bleach", 4282 + ] 4283 + ignore_missing_imports = true 4284 + 4285 + [tool.pytest.ini_options] 4286 + testpaths = ["tests"] 4287 + python_files = ["test_*.py"] 4288 + python_classes = ["Test*"] 4289 + python_functions = ["test_*"] 4290 + addopts = [ 4291 + "-ra", 4292 + "--strict-markers", 4293 + "--strict-config", 4294 + "--cov=src/thicket", 4295 + "--cov-report=term-missing", 4296 + "--cov-report=html", 4297 + "--cov-report=xml", 4298 + ] 4299 + filterwarnings = [ 4300 + "error", 4301 + "ignore::UserWarning", 4302 + "ignore::DeprecationWarning", 4303 + ] 4304 + markers = [ 4305 + "slow: marks tests as slow (deselect with '-m \"not slow\"')", 4306 + "integration: marks tests as integration tests", 4307 + ] 4308 + 4309 + [tool.coverage.run] 4310 + source = ["src"] 4311 + branch = true 4312 + 4313 + [tool.coverage.report] 4314 + exclude_lines = [ 4315 + "pragma: no cover", 4316 + "def __repr__", 4317 + "if self.debug:", 4318 + "if settings.DEBUG", 4319 + "raise AssertionError", 4320 + "raise NotImplementedError", 4321 + "if 0:", 4322 + "if __name__ == .__main__.:", 4323 + "class .*\\bProtocol\\):", 4324 + "@(abc\\.)?abstractmethod", 4325 + ] 4326 + </file> 4327 + 4328 + <file path="src/thicket/cli/commands/__init__.py"> 4329 + """CLI commands for thicket.""" 4330 + 4331 + # Import all commands to register them with the main app 4332 + from . import add, duplicates, generate, index_cmd, info_cmd, init, links_cmd, list_cmd, sync 4333 + 4334 + __all__ = ["add", "duplicates", "generate", "index_cmd", "info_cmd", "init", "links_cmd", "list_cmd", "sync"] 4335 + </file> 4336 + 4337 + <file path="src/thicket/cli/commands/add.py"> 4338 + """Add command for thicket.""" 4339 + 4340 + import asyncio 4341 + from pathlib import Path 4342 + from typing import Optional 4343 + 4344 + import typer 4345 + from pydantic import HttpUrl, ValidationError 4346 + 4347 + from ...core.feed_parser import FeedParser 4348 + from ...core.git_store import GitStore 4349 + from ..main import app 4350 + from ..utils import ( 4351 + create_progress, 4352 + load_config, 4353 + print_error, 4354 + print_info, 4355 + print_success, 4356 + ) 4357 + 4358 + 4359 + @app.command("add") 4360 + def add_command( 4361 + subcommand: str = typer.Argument(..., help="Subcommand: 'user' or 'feed'"), 4362 + username: str = typer.Argument(..., help="Username"), 4363 + feed_url: Optional[str] = typer.Argument(None, help="Feed URL (required for 'user' command)"), 4364 + email: Optional[str] = typer.Option(None, "--email", "-e", help="User email"), 4365 + homepage: Optional[str] = typer.Option(None, "--homepage", "-h", help="User homepage"), 4366 + icon: Optional[str] = typer.Option(None, "--icon", "-i", help="User icon URL"), 4367 + display_name: Optional[str] = typer.Option(None, "--display-name", "-d", help="User display name"), 4368 + config_file: Optional[Path] = typer.Option( 4369 + Path("thicket.yaml"), "--config", help="Configuration file path" 4370 + ), 4371 + auto_discover: bool = typer.Option( 4372 + True, "--auto-discover/--no-auto-discover", help="Auto-discover user metadata from feed" 4373 + ), 4374 + ) -> None: 4375 + """Add a user or feed to thicket.""" 4376 + 4377 + if subcommand == "user": 4378 + add_user(username, feed_url, email, homepage, icon, display_name, config_file, auto_discover) 4379 + elif subcommand == "feed": 4380 + add_feed(username, feed_url, config_file) 4381 + else: 4382 + print_error(f"Unknown subcommand: {subcommand}") 4383 + print_error("Use 'user' or 'feed'") 4384 + raise typer.Exit(1) 4385 + 4386 + 4387 + def add_user( 4388 + username: str, 4389 + feed_url: Optional[str], 4390 + email: Optional[str], 4391 + homepage: Optional[str], 4392 + icon: Optional[str], 4393 + display_name: Optional[str], 4394 + config_file: Path, 4395 + auto_discover: bool, 4396 + ) -> None: 4397 + """Add a new user with feed.""" 4398 + 4399 + if not feed_url: 4400 + print_error("Feed URL is required when adding a user") 4401 + raise typer.Exit(1) 4402 + 4403 + # Validate feed URL 4404 + try: 4405 + validated_feed_url = HttpUrl(feed_url) 4406 + except ValidationError: 4407 + print_error(f"Invalid feed URL: {feed_url}") 4408 + raise typer.Exit(1) from None 4409 + 4410 + # Load configuration 4411 + config = load_config(config_file) 4412 + 4413 + # Initialize Git store 4414 + git_store = GitStore(config.git_store) 4415 + 4416 + # Check if user already exists 4417 + existing_user = git_store.get_user(username) 4418 + if existing_user: 4419 + print_error(f"User '{username}' already exists") 4420 + print_error("Use 'thicket add feed' to add additional feeds") 4421 + raise typer.Exit(1) 4422 + 4423 + # Auto-discover metadata if enabled 4424 + discovered_metadata = None 4425 + if auto_discover: 4426 + discovered_metadata = asyncio.run(discover_feed_metadata(validated_feed_url)) 4427 + 4428 + # Prepare user data with manual overrides taking precedence 4429 + user_display_name = display_name or (discovered_metadata.author_name or discovered_metadata.title if discovered_metadata else None) 4430 + user_email = email or (discovered_metadata.author_email if discovered_metadata else None) 4431 + user_homepage = homepage or (str(discovered_metadata.author_uri or discovered_metadata.link) if discovered_metadata else None) 4432 + user_icon = icon or (str(discovered_metadata.logo or discovered_metadata.icon or discovered_metadata.image_url) if discovered_metadata else None) 4433 + 4434 + # Add user to Git store 4435 + git_store.add_user( 4436 + username=username, 4437 + display_name=user_display_name, 4438 + email=user_email, 4439 + homepage=user_homepage, 4440 + icon=user_icon, 4441 + feeds=[str(validated_feed_url)], 4442 + ) 4443 + 4444 + # Commit changes 4445 + git_store.commit_changes(f"Add user: {username}") 4446 + 4447 + print_success(f"Added user '{username}' with feed: {feed_url}") 4448 + 4449 + if discovered_metadata and auto_discover: 4450 + print_info("Auto-discovered metadata:") 4451 + if user_display_name: 4452 + print_info(f" Display name: {user_display_name}") 4453 + if user_email: 4454 + print_info(f" Email: {user_email}") 4455 + if user_homepage: 4456 + print_info(f" Homepage: {user_homepage}") 4457 + if user_icon: 4458 + print_info(f" Icon: {user_icon}") 4459 + 4460 + 4461 + def add_feed(username: str, feed_url: Optional[str], config_file: Path) -> None: 4462 + """Add a feed to an existing user.""" 4463 + 4464 + if not feed_url: 4465 + print_error("Feed URL is required") 4466 + raise typer.Exit(1) 4467 + 4468 + # Validate feed URL 4469 + try: 4470 + validated_feed_url = HttpUrl(feed_url) 4471 + except ValidationError: 4472 + print_error(f"Invalid feed URL: {feed_url}") 4473 + raise typer.Exit(1) from None 4474 + 4475 + # Load configuration 4476 + config = load_config(config_file) 4477 + 4478 + # Initialize Git store 4479 + git_store = GitStore(config.git_store) 4480 + 4481 + # Check if user exists 4482 + user = git_store.get_user(username) 4483 + if not user: 4484 + print_error(f"User '{username}' not found") 4485 + print_error("Use 'thicket add user' to add a new user") 4486 + raise typer.Exit(1) 4487 + 4488 + # Check if feed already exists 4489 + if str(validated_feed_url) in user.feeds: 4490 + print_error(f"Feed already exists for user '{username}': {feed_url}") 4491 + raise typer.Exit(1) 4492 + 4493 + # Add feed to user 4494 + updated_feeds = user.feeds + [str(validated_feed_url)] 4495 + if git_store.update_user(username, feeds=updated_feeds): 4496 + git_store.commit_changes(f"Add feed to user {username}: {feed_url}") 4497 + print_success(f"Added feed to user '{username}': {feed_url}") 4498 + else: 4499 + print_error(f"Failed to add feed to user '{username}'") 4500 + raise typer.Exit(1) 4501 + 4502 + 4503 + async def discover_feed_metadata(feed_url: HttpUrl): 4504 + """Discover metadata from a feed URL.""" 4505 + try: 4506 + with create_progress() as progress: 4507 + task = progress.add_task("Discovering feed metadata...", total=None) 4508 + 4509 + parser = FeedParser() 4510 + content = await parser.fetch_feed(feed_url) 4511 + metadata, _ = parser.parse_feed(content, feed_url) 4512 + 4513 + progress.update(task, completed=True) 4514 + return metadata 4515 + 4516 + except Exception as e: 4517 + print_error(f"Failed to discover feed metadata: {e}") 4518 + return None 4519 + </file> 4520 + 4521 + <file path="src/thicket/cli/commands/duplicates.py"> 4522 + """Duplicates command for thicket.""" 4523 + 4524 + from pathlib import Path 4525 + from typing import Optional 4526 + 4527 + import typer 4528 + from rich.table import Table 4529 + 4530 + from ...core.git_store import GitStore 4531 + from ..main import app 4532 + from ..utils import ( 4533 + console, 4534 + load_config, 4535 + print_error, 4536 + print_info, 4537 + print_success, 4538 + get_tsv_mode, 4539 + ) 4540 + 4541 + 4542 + @app.command("duplicates") 4543 + def duplicates_command( 4544 + action: str = typer.Argument(..., help="Action: 'list', 'add', 'remove'"), 4545 + duplicate_id: Optional[str] = typer.Argument(None, help="Duplicate entry ID"), 4546 + canonical_id: Optional[str] = typer.Argument(None, help="Canonical entry ID"), 4547 + config_file: Optional[Path] = typer.Option( 4548 + Path("thicket.yaml"), "--config", help="Configuration file path" 4549 + ), 4550 + ) -> None: 4551 + """Manage duplicate entry mappings.""" 4552 + 4553 + # Load configuration 4554 + config = load_config(config_file) 4555 + 4556 + # Initialize Git store 4557 + git_store = GitStore(config.git_store) 4558 + 4559 + if action == "list": 4560 + list_duplicates(git_store) 4561 + elif action == "add": 4562 + add_duplicate(git_store, duplicate_id, canonical_id) 4563 + elif action == "remove": 4564 + remove_duplicate(git_store, duplicate_id) 4565 + else: 4566 + print_error(f"Unknown action: {action}") 4567 + print_error("Use 'list', 'add', or 'remove'") 4568 + raise typer.Exit(1) 4569 + 4570 + 4571 + def list_duplicates(git_store: GitStore) -> None: 4572 + """List all duplicate mappings.""" 4573 + duplicates = git_store.get_duplicates() 4574 + 4575 + if not duplicates.duplicates: 4576 + if get_tsv_mode(): 4577 + print("No duplicate mappings found") 4578 + else: 4579 + print_info("No duplicate mappings found") 4580 + return 4581 + 4582 + if get_tsv_mode(): 4583 + print("Duplicate ID\tCanonical ID") 4584 + for duplicate_id, canonical_id in duplicates.duplicates.items(): 4585 + print(f"{duplicate_id}\t{canonical_id}") 4586 + print(f"Total duplicates: {len(duplicates.duplicates)}") 4587 + else: 4588 + table = Table(title="Duplicate Entry Mappings") 4589 + table.add_column("Duplicate ID", style="red") 4590 + table.add_column("Canonical ID", style="green") 4591 + 4592 + for duplicate_id, canonical_id in duplicates.duplicates.items(): 4593 + table.add_row(duplicate_id, canonical_id) 4594 + 4595 + console.print(table) 4596 + print_info(f"Total duplicates: {len(duplicates.duplicates)}") 4597 + 4598 + 4599 + def add_duplicate(git_store: GitStore, duplicate_id: Optional[str], canonical_id: Optional[str]) -> None: 4600 + """Add a duplicate mapping.""" 4601 + if not duplicate_id: 4602 + print_error("Duplicate ID is required") 4603 + raise typer.Exit(1) 4604 + 4605 + if not canonical_id: 4606 + print_error("Canonical ID is required") 4607 + raise typer.Exit(1) 4608 + 4609 + # Check if duplicate_id already exists 4610 + duplicates = git_store.get_duplicates() 4611 + if duplicates.is_duplicate(duplicate_id): 4612 + existing_canonical = duplicates.get_canonical(duplicate_id) 4613 + print_error(f"Duplicate ID already mapped to: {existing_canonical}") 4614 + print_error("Use 'remove' first to change the mapping") 4615 + raise typer.Exit(1) 4616 + 4617 + # Check if we're trying to make a canonical ID point to itself 4618 + if duplicate_id == canonical_id: 4619 + print_error("Duplicate ID cannot be the same as canonical ID") 4620 + raise typer.Exit(1) 4621 + 4622 + # Add the mapping 4623 + git_store.add_duplicate(duplicate_id, canonical_id) 4624 + 4625 + # Commit changes 4626 + git_store.commit_changes(f"Add duplicate mapping: {duplicate_id} -> {canonical_id}") 4627 + 4628 + print_success(f"Added duplicate mapping: {duplicate_id} -> {canonical_id}") 4629 + 4630 + 4631 + def remove_duplicate(git_store: GitStore, duplicate_id: Optional[str]) -> None: 4632 + """Remove a duplicate mapping.""" 4633 + if not duplicate_id: 4634 + print_error("Duplicate ID is required") 4635 + raise typer.Exit(1) 4636 + 4637 + # Check if mapping exists 4638 + duplicates = git_store.get_duplicates() 4639 + if not duplicates.is_duplicate(duplicate_id): 4640 + print_error(f"No duplicate mapping found for: {duplicate_id}") 4641 + raise typer.Exit(1) 4642 + 4643 + canonical_id = duplicates.get_canonical(duplicate_id) 4644 + 4645 + # Remove the mapping 4646 + if git_store.remove_duplicate(duplicate_id): 4647 + # Commit changes 4648 + git_store.commit_changes(f"Remove duplicate mapping: {duplicate_id} -> {canonical_id}") 4649 + print_success(f"Removed duplicate mapping: {duplicate_id} -> {canonical_id}") 4650 + else: 4651 + print_error(f"Failed to remove duplicate mapping: {duplicate_id}") 4652 + raise typer.Exit(1) 4653 + </file> 4654 + 4655 + <file path="src/thicket/cli/commands/sync.py"> 4656 + """Sync command for thicket.""" 4657 + 4658 + import asyncio 4659 + from pathlib import Path 4660 + from typing import Optional 4661 + 4662 + import typer 4663 + from rich.progress import track 4664 + 4665 + from ...core.feed_parser import FeedParser 4666 + from ...core.git_store import GitStore 4667 + from ..main import app 4668 + from ..utils import ( 4669 + load_config, 4670 + print_error, 4671 + print_info, 4672 + print_success, 4673 + ) 4674 + 4675 + 4676 + @app.command() 4677 + def sync( 4678 + all_users: bool = typer.Option( 4679 + False, "--all", "-a", help="Sync all users and feeds" 4680 + ), 4681 + user: Optional[str] = typer.Option( 4682 + None, "--user", "-u", help="Sync specific user only" 4683 + ), 4684 + config_file: Optional[Path] = typer.Option( 4685 + Path("thicket.yaml"), "--config", help="Configuration file path" 4686 + ), 4687 + dry_run: bool = typer.Option( 4688 + False, "--dry-run", help="Show what would be synced without making changes" 4689 + ), 4690 + ) -> None: 4691 + """Sync feeds and store entries in Git repository.""" 4692 + 4693 + # Load configuration 4694 + config = load_config(config_file) 4695 + 4696 + # Initialize Git store 4697 + git_store = GitStore(config.git_store) 4698 + 4699 + # Determine which users to sync from git repository 4700 + users_to_sync = [] 4701 + if all_users: 4702 + index = git_store._load_index() 4703 + users_to_sync = list(index.users.values()) 4704 + elif user: 4705 + user_metadata = git_store.get_user(user) 4706 + if not user_metadata: 4707 + print_error(f"User '{user}' not found in git repository") 4708 + raise typer.Exit(1) 4709 + users_to_sync = [user_metadata] 4710 + else: 4711 + print_error("Specify --all to sync all users or --user to sync a specific user") 4712 + raise typer.Exit(1) 4713 + 4714 + if not users_to_sync: 4715 + print_info("No users configured to sync") 4716 + return 4717 + 4718 + # Sync each user 4719 + total_new_entries = 0 4720 + total_updated_entries = 0 4721 + 4722 + for user_metadata in users_to_sync: 4723 + print_info(f"Syncing user: {user_metadata.username}") 4724 + 4725 + user_new_entries = 0 4726 + user_updated_entries = 0 4727 + 4728 + # Sync each feed for the user 4729 + for feed_url in track(user_metadata.feeds, description=f"Syncing {user_metadata.username}'s feeds"): 4730 + try: 4731 + new_entries, updated_entries = asyncio.run( 4732 + sync_feed(git_store, user_metadata.username, feed_url, dry_run) 4733 + ) 4734 + user_new_entries += new_entries 4735 + user_updated_entries += updated_entries 4736 + 4737 + except Exception as e: 4738 + print_error(f"Failed to sync feed {feed_url}: {e}") 4739 + continue 4740 + 4741 + print_info(f"User {user_metadata.username}: {user_new_entries} new, {user_updated_entries} updated") 4742 + total_new_entries += user_new_entries 4743 + total_updated_entries += user_updated_entries 4744 + 4745 + # Commit changes if not dry run 4746 + if not dry_run and (total_new_entries > 0 or total_updated_entries > 0): 4747 + commit_message = f"Sync feeds: {total_new_entries} new entries, {total_updated_entries} updated" 4748 + git_store.commit_changes(commit_message) 4749 + print_success(f"Committed changes: {commit_message}") 4750 + 4751 + # Summary 4752 + if dry_run: 4753 + print_info(f"Dry run complete: would sync {total_new_entries} new entries, {total_updated_entries} updated") 4754 + else: 4755 + print_success(f"Sync complete: {total_new_entries} new entries, {total_updated_entries} updated") 4756 + 4757 + 4758 + async def sync_feed(git_store: GitStore, username: str, feed_url, dry_run: bool) -> tuple[int, int]: 4759 + """Sync a single feed for a user.""" 4760 + 4761 + parser = FeedParser() 4762 + 4763 + try: 4764 + # Fetch and parse feed 4765 + content = await parser.fetch_feed(feed_url) 4766 + metadata, entries = parser.parse_feed(content, feed_url) 4767 + 4768 + new_entries = 0 4769 + updated_entries = 0 4770 + 4771 + # Process each entry 4772 + for entry in entries: 4773 + try: 4774 + # Check if entry already exists 4775 + existing_entry = git_store.get_entry(username, entry.id) 4776 + 4777 + if existing_entry: 4778 + # Check if entry has been updated 4779 + if existing_entry.updated != entry.updated: 4780 + if not dry_run: 4781 + git_store.store_entry(username, entry) 4782 + updated_entries += 1 4783 + else: 4784 + # New entry 4785 + if not dry_run: 4786 + git_store.store_entry(username, entry) 4787 + new_entries += 1 4788 + 4789 + except Exception as e: 4790 + print_error(f"Failed to process entry {entry.id}: {e}") 4791 + continue 4792 + 4793 + return new_entries, updated_entries 4794 + 4795 + except Exception as e: 4796 + print_error(f"Failed to sync feed {feed_url}: {e}") 4797 + return 0, 0 4798 + </file> 4799 + 4800 + <file path="src/thicket/models/config.py"> 4801 + """Configuration models for thicket.""" 4802 + 4803 + from pathlib import Path 4804 + from typing import Optional 4805 + 4806 + from pydantic import BaseModel, EmailStr, HttpUrl 4807 + from pydantic_settings import BaseSettings, SettingsConfigDict 4808 + 4809 + 4810 + class UserConfig(BaseModel): 4811 + """Configuration for a single user and their feeds.""" 4812 + 4813 + username: str 4814 + feeds: list[HttpUrl] 4815 + email: Optional[EmailStr] = None 4816 + homepage: Optional[HttpUrl] = None 4817 + icon: Optional[HttpUrl] = None 4818 + display_name: Optional[str] = None 4819 + 4820 + 4821 + class ThicketConfig(BaseSettings): 4822 + """Main configuration for thicket.""" 4823 + 4824 + model_config = SettingsConfigDict( 4825 + env_prefix="THICKET_", 4826 + env_file=".env", 4827 + yaml_file="thicket.yaml", 4828 + case_sensitive=False, 4829 + ) 4830 + 4831 + git_store: Path 4832 + cache_dir: Path 4833 + users: list[UserConfig] = [] 4834 + </file> 4835 + 4836 + <file path="src/thicket/cli/commands/links_cmd.py"> 4837 + """CLI command for extracting and categorizing all outbound links from blog entries.""" 4838 + 4839 + import json 4840 + import re 4841 + from pathlib import Path 4842 + from typing import Dict, List, Optional, Set 4843 + from urllib.parse import urljoin, urlparse 4844 + 4845 + import typer 4846 + from rich.console import Console 4847 + from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn 4848 + from rich.table import Table 4849 + 4850 + from ...core.git_store import GitStore 4851 + from ..main import app 4852 + from ..utils import load_config, get_tsv_mode 4853 + 4854 + console = Console() 4855 + 4856 + 4857 + class LinkData: 4858 + """Represents a link found in a blog entry.""" 4859 + 4860 + def __init__(self, url: str, entry_id: str, username: str): 4861 + self.url = url 4862 + self.entry_id = entry_id 4863 + self.username = username 4864 + 4865 + def to_dict(self) -> dict: 4866 + """Convert to dictionary for JSON serialization.""" 4867 + return { 4868 + "url": self.url, 4869 + "entry_id": self.entry_id, 4870 + "username": self.username 4871 + } 4872 + 4873 + @classmethod 4874 + def from_dict(cls, data: dict) -> "LinkData": 4875 + """Create from dictionary.""" 4876 + return cls( 4877 + url=data["url"], 4878 + entry_id=data["entry_id"], 4879 + username=data["username"] 4880 + ) 4881 + 4882 + 4883 + class LinkCategorizer: 4884 + """Categorizes links as internal, user, or unknown.""" 4885 + 4886 + def __init__(self, user_domains: Dict[str, Set[str]]): 4887 + self.user_domains = user_domains 4888 + # Create reverse mapping of domain -> username 4889 + self.domain_to_user = {} 4890 + for username, domains in user_domains.items(): 4891 + for domain in domains: 4892 + self.domain_to_user[domain] = username 4893 + 4894 + def categorize_url(self, url: str, source_username: str) -> tuple[str, Optional[str]]: 4895 + """ 4896 + Categorize a URL as 'internal', 'user', or 'unknown'. 4897 + Returns (category, target_username). 4898 + """ 4899 + try: 4900 + parsed = urlparse(url) 4901 + domain = parsed.netloc.lower() 4902 + 4903 + # Check if it's a link to the same user's domain (internal) 4904 + if domain in self.user_domains.get(source_username, set()): 4905 + return "internal", source_username 4906 + 4907 + # Check if it's a link to another user's domain 4908 + if domain in self.domain_to_user: 4909 + return "user", self.domain_to_user[domain] 4910 + 4911 + # Everything else is unknown 4912 + return "unknown", None 4913 + 4914 + except Exception: 4915 + return "unknown", None 4916 + 4917 + 4918 + class LinkExtractor: 4919 + """Extracts and resolves links from blog entries.""" 4920 + 4921 + def __init__(self): 4922 + # Pattern for extracting links from HTML 4923 + self.link_pattern = re.compile(r'<a[^>]+href="([^"]+)"[^>]*>(.*?)</a>', re.IGNORECASE | re.DOTALL) 4924 + self.url_pattern = re.compile(r'https?://[^\s<>"]+') 4925 + 4926 + def extract_links_from_html(self, html_content: str, base_url: str) -> List[tuple[str, str]]: 4927 + """Extract all links from HTML content and resolve them against base URL.""" 4928 + links = [] 4929 + 4930 + # Extract links from <a> tags 4931 + for match in self.link_pattern.finditer(html_content): 4932 + url = match.group(1) 4933 + text = re.sub(r'<[^>]+>', '', match.group(2)).strip() # Remove HTML tags from link text 4934 + 4935 + # Resolve relative URLs against base URL 4936 + resolved_url = urljoin(base_url, url) 4937 + links.append((resolved_url, text)) 4938 + 4939 + return links 4940 + 4941 + 4942 + def extract_links_from_entry(self, entry, username: str, base_url: str) -> List[LinkData]: 4943 + """Extract all links from a blog entry.""" 4944 + links = [] 4945 + 4946 + # Combine all text content for analysis 4947 + content_to_search = [] 4948 + if entry.content: 4949 + content_to_search.append(entry.content) 4950 + if entry.summary: 4951 + content_to_search.append(entry.summary) 4952 + 4953 + for content in content_to_search: 4954 + extracted_links = self.extract_links_from_html(content, base_url) 4955 + 4956 + for url, link_text in extracted_links: 4957 + # Skip empty URLs 4958 + if not url or url.startswith('#'): 4959 + continue 4960 + 4961 + link_data = LinkData( 4962 + url=url, 4963 + entry_id=entry.id, 4964 + username=username 4965 + ) 4966 + 4967 + links.append(link_data) 4968 + 4969 + return links 4970 + 4971 + 4972 + @app.command() 4973 + def links( 4974 + config_file: Optional[Path] = typer.Option( 4975 + Path("thicket.yaml"), 4976 + "--config", 4977 + "-c", 4978 + help="Path to configuration file", 4979 + ), 4980 + output_file: Optional[Path] = typer.Option( 4981 + None, 4982 + "--output", 4983 + "-o", 4984 + help="Path to output unified links file (default: links.json in git store)", 4985 + ), 4986 + verbose: bool = typer.Option( 4987 + False, 4988 + "--verbose", 4989 + "-v", 4990 + help="Show detailed progress information", 4991 + ), 4992 + ) -> None: 4993 + """Extract and categorize all outbound links from blog entries. 4994 + 4995 + This command analyzes all blog entries to extract outbound links, 4996 + resolve them properly with respect to the feed's base URL, and 4997 + categorize them as internal, user, or unknown links. 4998 + 4999 + Creates a unified links.json file containing all link data. 5000 + """ 5001 + try: 5002 + # Load configuration 5003 + config = load_config(config_file) 5004 + 5005 + # Initialize Git store 5006 + git_store = GitStore(config.git_store) 5007 + 5008 + # Build user domain mapping 5009 + if verbose: 5010 + console.print("Building user domain mapping...") 5011 + 5012 + index = git_store._load_index() 5013 + user_domains = {} 5014 + 5015 + for username, user_metadata in index.users.items(): 5016 + domains = set() 5017 + 5018 + # Add domains from feeds 5019 + for feed_url in user_metadata.feeds: 5020 + domain = urlparse(feed_url).netloc.lower() 5021 + if domain: 5022 + domains.add(domain) 5023 + 5024 + # Add domain from homepage 5025 + if user_metadata.homepage: 5026 + domain = urlparse(str(user_metadata.homepage)).netloc.lower() 5027 + if domain: 5028 + domains.add(domain) 5029 + 5030 + user_domains[username] = domains 5031 + 5032 + if verbose: 5033 + console.print(f"Found {len(user_domains)} users with {sum(len(d) for d in user_domains.values())} total domains") 5034 + 5035 + # Initialize components 5036 + link_extractor = LinkExtractor() 5037 + categorizer = LinkCategorizer(user_domains) 5038 + 5039 + # Get all users 5040 + users = list(index.users.keys()) 5041 + 5042 + if not users: 5043 + console.print("[yellow]No users found in Git store[/yellow]") 5044 + raise typer.Exit(0) 5045 + 5046 + # Process all entries 5047 + all_links = [] 5048 + link_categories = {"internal": [], "user": [], "unknown": []} 5049 + link_dict = {} # Dictionary with link URL as key, maps to list of atom IDs 5050 + reverse_dict = {} # Dictionary with atom ID as key, maps to list of URLs 5051 + 5052 + with Progress( 5053 + SpinnerColumn(), 5054 + TextColumn("[progress.description]{task.description}"), 5055 + BarColumn(), 5056 + TaskProgressColumn(), 5057 + console=console, 5058 + ) as progress: 5059 + 5060 + # Count total entries first 5061 + counting_task = progress.add_task("Counting entries...", total=len(users)) 5062 + total_entries = 0 5063 + 5064 + for username in users: 5065 + entries = git_store.list_entries(username) 5066 + total_entries += len(entries) 5067 + progress.advance(counting_task) 5068 + 5069 + progress.remove_task(counting_task) 5070 + 5071 + # Process entries 5072 + processing_task = progress.add_task( 5073 + f"Processing {total_entries} entries...", 5074 + total=total_entries 5075 + ) 5076 + 5077 + for username in users: 5078 + entries = git_store.list_entries(username) 5079 + user_metadata = index.users[username] 5080 + 5081 + # Get base URL for this user (use first feed URL) 5082 + base_url = str(user_metadata.feeds[0]) if user_metadata.feeds else "https://example.com" 5083 + 5084 + for entry in entries: 5085 + # Extract links from this entry 5086 + entry_links = link_extractor.extract_links_from_entry(entry, username, base_url) 5087 + 5088 + # Track unique links per entry 5089 + entry_urls_seen = set() 5090 + 5091 + # Categorize each link 5092 + for link_data in entry_links: 5093 + # Skip if we've already seen this URL in this entry 5094 + if link_data.url in entry_urls_seen: 5095 + continue 5096 + entry_urls_seen.add(link_data.url) 5097 + 5098 + category, target_username = categorizer.categorize_url(link_data.url, username) 5099 + 5100 + # Add to link dictionary (URL as key, maps to list of atom IDs) 5101 + if link_data.url not in link_dict: 5102 + link_dict[link_data.url] = [] 5103 + if link_data.entry_id not in link_dict[link_data.url]: 5104 + link_dict[link_data.url].append(link_data.entry_id) 5105 + 5106 + # Also add to reverse mapping (atom ID -> list of URLs) 5107 + if link_data.entry_id not in reverse_dict: 5108 + reverse_dict[link_data.entry_id] = [] 5109 + if link_data.url not in reverse_dict[link_data.entry_id]: 5110 + reverse_dict[link_data.entry_id].append(link_data.url) 5111 + 5112 + # Add category info to link data for categories tracking 5113 + link_info = link_data.to_dict() 5114 + link_info["category"] = category 5115 + link_info["target_username"] = target_username 5116 + 5117 + all_links.append(link_info) 5118 + link_categories[category].append(link_info) 5119 + 5120 + progress.advance(processing_task) 5121 + 5122 + if verbose and entry_links: 5123 + console.print(f" Found {len(entry_links)} links in {username}:{entry.title[:50]}...") 5124 + 5125 + # Determine output path 5126 + if output_file: 5127 + output_path = output_file 5128 + else: 5129 + output_path = config.git_store / "links.json" 5130 + 5131 + # Save all extracted links (not just filtered ones) 5132 + if verbose: 5133 + console.print("Preparing output data...") 5134 + 5135 + # Build a set of all URLs that correspond to posts in the git database 5136 + registered_urls = set() 5137 + 5138 + # Get all entries from all users and build URL mappings 5139 + for username in users: 5140 + entries = git_store.list_entries(username) 5141 + user_metadata = index.users[username] 5142 + 5143 + for entry in entries: 5144 + # Try to match entry URLs with extracted links 5145 + if hasattr(entry, 'link') and entry.link: 5146 + registered_urls.add(str(entry.link)) 5147 + 5148 + # Also check entry alternate links if they exist 5149 + if hasattr(entry, 'links') and entry.links: 5150 + for link in entry.links: 5151 + if hasattr(link, 'href') and link.href: 5152 + registered_urls.add(str(link.href)) 5153 + 5154 + # Build unified structure with metadata 5155 + unified_links = {} 5156 + reverse_mapping = {} 5157 + 5158 + for url, entry_ids in link_dict.items(): 5159 + unified_links[url] = { 5160 + "referencing_entries": entry_ids 5161 + } 5162 + 5163 + # Find target username if this is a tracked post 5164 + if url in registered_urls: 5165 + for username in users: 5166 + user_domains_set = {domain for domain in user_domains.get(username, [])} 5167 + if any(domain in url for domain in user_domains_set): 5168 + unified_links[url]["target_username"] = username 5169 + break 5170 + 5171 + # Build reverse mapping 5172 + for entry_id in entry_ids: 5173 + if entry_id not in reverse_mapping: 5174 + reverse_mapping[entry_id] = [] 5175 + if url not in reverse_mapping[entry_id]: 5176 + reverse_mapping[entry_id].append(url) 5177 + 5178 + # Create unified output data 5179 + output_data = { 5180 + "links": unified_links, 5181 + "reverse_mapping": reverse_mapping, 5182 + "user_domains": {k: list(v) for k, v in user_domains.items()} 5183 + } 5184 + 5185 + if verbose: 5186 + console.print(f"Found {len(registered_urls)} registered post URLs") 5187 + console.print(f"Found {len(link_dict)} total links, {sum(1 for link in unified_links.values() if 'target_username' in link)} tracked posts") 5188 + 5189 + # Save unified data 5190 + with open(output_path, "w") as f: 5191 + json.dump(output_data, f, indent=2, default=str) 5192 + 5193 + # Show summary 5194 + if not get_tsv_mode(): 5195 + console.print("\n[green]✓ Links extraction completed successfully[/green]") 5196 + 5197 + # Create summary table or TSV output 5198 + if get_tsv_mode(): 5199 + print("Category\tCount\tDescription") 5200 + print(f"Internal\t{len(link_categories['internal'])}\tLinks to same user's domain") 5201 + print(f"User\t{len(link_categories['user'])}\tLinks to other tracked users") 5202 + print(f"Unknown\t{len(link_categories['unknown'])}\tLinks to external sites") 5203 + print(f"Total Extracted\t{len(all_links)}\tAll extracted links") 5204 + print(f"Saved to Output\t{len(output_data['links'])}\tLinks saved to output file") 5205 + print(f"Cross-references\t{sum(1 for link in unified_links.values() if 'target_username' in link)}\tLinks to registered posts only") 5206 + else: 5207 + table = Table(title="Links Summary") 5208 + table.add_column("Category", style="cyan") 5209 + table.add_column("Count", style="green") 5210 + table.add_column("Description", style="white") 5211 + 5212 + table.add_row("Internal", str(len(link_categories["internal"])), "Links to same user's domain") 5213 + table.add_row("User", str(len(link_categories["user"])), "Links to other tracked users") 5214 + table.add_row("Unknown", str(len(link_categories["unknown"])), "Links to external sites") 5215 + table.add_row("Total Extracted", str(len(all_links)), "All extracted links") 5216 + table.add_row("Saved to Output", str(len(output_data['links'])), "Links saved to output file") 5217 + table.add_row("Cross-references", str(sum(1 for link in unified_links.values() if 'target_username' in link)), "Links to registered posts only") 5218 + 5219 + console.print(table) 5220 + 5221 + # Show user links if verbose 5222 + if verbose and link_categories["user"]: 5223 + if get_tsv_mode(): 5224 + print("User Link Source\tUser Link Target\tLink Count") 5225 + user_link_counts = {} 5226 + 5227 + for link in link_categories["user"]: 5228 + key = f"{link['username']} -> {link['target_username']}" 5229 + user_link_counts[key] = user_link_counts.get(key, 0) + 1 5230 + 5231 + for link_pair, count in sorted(user_link_counts.items(), key=lambda x: x[1], reverse=True)[:10]: 5232 + source, target = link_pair.split(" -> ") 5233 + print(f"{source}\t{target}\t{count}") 5234 + else: 5235 + console.print("\n[bold]User-to-user links:[/bold]") 5236 + user_link_counts = {} 5237 + 5238 + for link in link_categories["user"]: 5239 + key = f"{link['username']} -> {link['target_username']}" 5240 + user_link_counts[key] = user_link_counts.get(key, 0) + 1 5241 + 5242 + for link_pair, count in sorted(user_link_counts.items(), key=lambda x: x[1], reverse=True)[:10]: 5243 + console.print(f" {link_pair}: {count} links") 5244 + 5245 + if not get_tsv_mode(): 5246 + console.print(f"\nUnified links data saved to: {output_path}") 5247 + 5248 + except Exception as e: 5249 + console.print(f"[red]Error extracting links: {e}[/red]") 5250 + if verbose: 5251 + console.print_exception() 5252 + raise typer.Exit(1) 5253 + </file> 5254 + 5255 + <file path="src/thicket/cli/commands/list_cmd.py"> 5256 + """List command for thicket.""" 5257 + 5258 + import re 5259 + from pathlib import Path 5260 + from typing import Optional 5261 + 5262 + import typer 5263 + from rich.table import Table 5264 + 5265 + from ...core.git_store import GitStore 5266 + from ..main import app 5267 + from ..utils import ( 5268 + console, 5269 + load_config, 5270 + print_error, 5271 + print_feeds_table, 5272 + print_feeds_table_from_git, 5273 + print_info, 5274 + print_users_table, 5275 + print_users_table_from_git, 5276 + print_entries_tsv, 5277 + get_tsv_mode, 5278 + ) 5279 + 5280 + 5281 + @app.command("list") 5282 + def list_command( 5283 + what: str = typer.Argument(..., help="What to list: 'users', 'feeds', 'entries'"), 5284 + user: Optional[str] = typer.Option( 5285 + None, "--user", "-u", help="Filter by specific user" 5286 + ), 5287 + limit: Optional[int] = typer.Option( 5288 + None, "--limit", "-l", help="Limit number of results" 5289 + ), 5290 + config_file: Optional[Path] = typer.Option( 5291 + Path("thicket.yaml"), "--config", help="Configuration file path" 5292 + ), 5293 + ) -> None: 5294 + """List users, feeds, or entries.""" 5295 + 5296 + # Load configuration 5297 + config = load_config(config_file) 5298 + 5299 + # Initialize Git store 5300 + git_store = GitStore(config.git_store) 5301 + 5302 + if what == "users": 5303 + list_users(git_store) 5304 + elif what == "feeds": 5305 + list_feeds(git_store, user) 5306 + elif what == "entries": 5307 + list_entries(git_store, user, limit) 5308 + else: 5309 + print_error(f"Unknown list type: {what}") 5310 + print_error("Use 'users', 'feeds', or 'entries'") 5311 + raise typer.Exit(1) 5312 + 5313 + 5314 + def list_users(git_store: GitStore) -> None: 5315 + """List all users.""" 5316 + index = git_store._load_index() 5317 + users = list(index.users.values()) 5318 + 5319 + if not users: 5320 + print_info("No users configured") 5321 + return 5322 + 5323 + print_users_table_from_git(users) 5324 + 5325 + 5326 + def list_feeds(git_store: GitStore, username: Optional[str] = None) -> None: 5327 + """List feeds, optionally filtered by user.""" 5328 + if username: 5329 + user = git_store.get_user(username) 5330 + if not user: 5331 + print_error(f"User '{username}' not found") 5332 + raise typer.Exit(1) 5333 + 5334 + if not user.feeds: 5335 + print_info(f"No feeds configured for user '{username}'") 5336 + return 5337 + 5338 + print_feeds_table_from_git(git_store, username) 5339 + 5340 + 5341 + def list_entries(git_store: GitStore, username: Optional[str] = None, limit: Optional[int] = None) -> None: 5342 + """List entries, optionally filtered by user.""" 5343 + 5344 + if username: 5345 + # List entries for specific user 5346 + user = git_store.get_user(username) 5347 + if not user: 5348 + print_error(f"User '{username}' not found") 5349 + raise typer.Exit(1) 5350 + 5351 + entries = git_store.list_entries(username, limit) 5352 + if not entries: 5353 + print_info(f"No entries found for user '{username}'") 5354 + return 5355 + 5356 + print_entries_table([entries], [username]) 5357 + 5358 + else: 5359 + # List entries for all users 5360 + all_entries = [] 5361 + all_usernames = [] 5362 + 5363 + index = git_store._load_index() 5364 + for user in index.users.values(): 5365 + entries = git_store.list_entries(user.username, limit) 5366 + if entries: 5367 + all_entries.append(entries) 5368 + all_usernames.append(user.username) 5369 + 5370 + if not all_entries: 5371 + print_info("No entries found") 5372 + return 5373 + 5374 + print_entries_table(all_entries, all_usernames) 5375 + 5376 + 5377 + def _clean_html_content(content: Optional[str]) -> str: 5378 + """Clean HTML content for display in table.""" 5379 + if not content: 5380 + return "" 5381 + 5382 + # Remove HTML tags 5383 + clean_text = re.sub(r'<[^>]+>', ' ', content) 5384 + # Replace multiple whitespace with single space 5385 + clean_text = re.sub(r'\s+', ' ', clean_text) 5386 + # Strip and limit length 5387 + clean_text = clean_text.strip() 5388 + if len(clean_text) > 100: 5389 + clean_text = clean_text[:97] + "..." 5390 + 5391 + return clean_text 5392 + 5393 + 5394 + def print_entries_table(entries_by_user: list[list], usernames: list[str]) -> None: 5395 + """Print a table of entries.""" 5396 + if get_tsv_mode(): 5397 + print_entries_tsv(entries_by_user, usernames) 5398 + return 5399 + 5400 + table = Table(title="Feed Entries") 5401 + table.add_column("User", style="cyan", no_wrap=True) 5402 + table.add_column("Title", style="bold") 5403 + table.add_column("Updated", style="blue") 5404 + table.add_column("URL", style="green") 5405 + 5406 + # Combine all entries with usernames 5407 + all_entries = [] 5408 + for entries, username in zip(entries_by_user, usernames): 5409 + for entry in entries: 5410 + all_entries.append((username, entry)) 5411 + 5412 + # Sort by updated time (newest first) 5413 + all_entries.sort(key=lambda x: x[1].updated, reverse=True) 5414 + 5415 + for username, entry in all_entries: 5416 + # Format updated time 5417 + updated_str = entry.updated.strftime("%Y-%m-%d %H:%M") 5418 + 5419 + # Truncate title if too long 5420 + title = entry.title 5421 + if len(title) > 50: 5422 + title = title[:47] + "..." 5423 + 5424 + table.add_row( 5425 + username, 5426 + title, 5427 + updated_str, 5428 + str(entry.link), 5429 + ) 5430 + 5431 + console.print(table) 5432 + </file> 5433 + 5434 + <file path="src/thicket/cli/main.py"> 5435 + """Main CLI application using Typer.""" 5436 + 5437 + import typer 5438 + from rich.console import Console 5439 + 5440 + from .. import __version__ 5441 + 5442 + app = typer.Typer( 5443 + name="thicket", 5444 + help="A CLI tool for persisting Atom/RSS feeds in Git repositories", 5445 + no_args_is_help=True, 5446 + rich_markup_mode="rich", 5447 + ) 5448 + 5449 + console = Console() 5450 + 5451 + # Global state for TSV output mode 5452 + tsv_mode = False 5453 + 5454 + 5455 + def version_callback(value: bool) -> None: 5456 + """Show version and exit.""" 5457 + if value: 5458 + console.print(f"thicket version {__version__}") 5459 + raise typer.Exit() 5460 + 5461 + 5462 + @app.callback() 5463 + def main( 5464 + version: bool = typer.Option( 5465 + None, 5466 + "--version", 5467 + "-v", 5468 + help="Show the version and exit", 5469 + callback=version_callback, 5470 + is_eager=True, 5471 + ), 5472 + tsv: bool = typer.Option( 5473 + False, 5474 + "--tsv", 5475 + help="Output in tab-separated values format without truncation", 5476 + ), 5477 + ) -> None: 5478 + """Thicket: A CLI tool for persisting Atom/RSS feeds in Git repositories.""" 5479 + global tsv_mode 5480 + tsv_mode = tsv 5481 + 5482 + 5483 + # Import commands to register them 5484 + from .commands import add, duplicates, generate, index_cmd, info_cmd, init, links_cmd, list_cmd, sync 5485 + 5486 + if __name__ == "__main__": 5487 + app() 5488 + </file> 5489 + 5490 + <file path="src/thicket/core/git_store.py"> 5491 + """Git repository operations for thicket.""" 5492 + 5493 + import json 5494 + from datetime import datetime 5495 + from pathlib import Path 5496 + from typing import Optional 5497 + 5498 + import git 5499 + from git import Repo 5500 + 5501 + from ..models import AtomEntry, DuplicateMap, GitStoreIndex, UserMetadata 5502 + 5503 + 5504 + class GitStore: 5505 + """Manages the Git repository for storing feed entries.""" 5506 + 5507 + def __init__(self, repo_path: Path): 5508 + """Initialize the Git store.""" 5509 + self.repo_path = repo_path 5510 + self.repo: Optional[Repo] = None 5511 + self._ensure_repo() 5512 + 5513 + def _ensure_repo(self) -> None: 5514 + """Ensure the Git repository exists and is initialized.""" 5515 + if not self.repo_path.exists(): 5516 + self.repo_path.mkdir(parents=True, exist_ok=True) 5517 + 5518 + try: 5519 + self.repo = Repo(self.repo_path) 5520 + except git.InvalidGitRepositoryError: 5521 + # Initialize new repository 5522 + self.repo = Repo.init(self.repo_path) 5523 + self._create_initial_structure() 5524 + 5525 + def _create_initial_structure(self) -> None: 5526 + """Create initial Git store structure.""" 5527 + # Create index.json 5528 + index = GitStoreIndex( 5529 + created=datetime.now(), 5530 + last_updated=datetime.now(), 5531 + ) 5532 + self._save_index(index) 5533 + 5534 + # Create duplicates.json 5535 + duplicates = DuplicateMap() 5536 + self._save_duplicates(duplicates) 5537 + 5538 + # Create initial commit 5539 + self.repo.index.add(["index.json", "duplicates.json"]) 5540 + self.repo.index.commit("Initial thicket repository structure") 5541 + 5542 + def _save_index(self, index: GitStoreIndex) -> None: 5543 + """Save the index to index.json.""" 5544 + index_path = self.repo_path / "index.json" 5545 + with open(index_path, "w") as f: 5546 + json.dump(index.model_dump(mode="json", exclude_none=True), f, indent=2, default=str) 5547 + 5548 + def _load_index(self) -> GitStoreIndex: 5549 + """Load the index from index.json.""" 5550 + index_path = self.repo_path / "index.json" 5551 + if not index_path.exists(): 5552 + return GitStoreIndex( 5553 + created=datetime.now(), 5554 + last_updated=datetime.now(), 5555 + ) 5556 + 5557 + with open(index_path) as f: 5558 + data = json.load(f) 5559 + 5560 + return GitStoreIndex(**data) 5561 + 5562 + def _save_duplicates(self, duplicates: DuplicateMap) -> None: 5563 + """Save duplicates map to duplicates.json.""" 5564 + duplicates_path = self.repo_path / "duplicates.json" 5565 + with open(duplicates_path, "w") as f: 5566 + json.dump(duplicates.model_dump(exclude_none=True), f, indent=2) 5567 + 5568 + def _load_duplicates(self) -> DuplicateMap: 5569 + """Load duplicates map from duplicates.json.""" 5570 + duplicates_path = self.repo_path / "duplicates.json" 5571 + if not duplicates_path.exists(): 5572 + return DuplicateMap() 5573 + 5574 + with open(duplicates_path) as f: 5575 + data = json.load(f) 5576 + 5577 + return DuplicateMap(**data) 5578 + 5579 + def add_user(self, username: str, display_name: Optional[str] = None, 5580 + email: Optional[str] = None, homepage: Optional[str] = None, 5581 + icon: Optional[str] = None, feeds: Optional[list[str]] = None) -> UserMetadata: 5582 + """Add a new user to the Git store.""" 5583 + index = self._load_index() 5584 + 5585 + # Create user directory 5586 + user_dir = self.repo_path / username 5587 + user_dir.mkdir(exist_ok=True) 5588 + 5589 + # Create user metadata 5590 + user_metadata = UserMetadata( 5591 + username=username, 5592 + display_name=display_name, 5593 + email=email, 5594 + homepage=homepage, 5595 + icon=icon, 5596 + feeds=feeds or [], 5597 + directory=username, 5598 + created=datetime.now(), 5599 + last_updated=datetime.now(), 5600 + ) 5601 + 5602 + 5603 + # Update index 5604 + index.add_user(user_metadata) 5605 + self._save_index(index) 5606 + 5607 + return user_metadata 5608 + 5609 + def get_user(self, username: str) -> Optional[UserMetadata]: 5610 + """Get user metadata by username.""" 5611 + index = self._load_index() 5612 + return index.get_user(username) 5613 + 5614 + def update_user(self, username: str, **kwargs) -> bool: 5615 + """Update user metadata.""" 5616 + index = self._load_index() 5617 + user = index.get_user(username) 5618 + 5619 + if not user: 5620 + return False 5621 + 5622 + # Update user metadata 5623 + for key, value in kwargs.items(): 5624 + if hasattr(user, key) and value is not None: 5625 + setattr(user, key, value) 5626 + 5627 + user.update_timestamp() 5628 + 5629 + 5630 + # Update index 5631 + index.add_user(user) 5632 + self._save_index(index) 5633 + 5634 + return True 5635 + 5636 + def store_entry(self, username: str, entry: AtomEntry) -> bool: 5637 + """Store an entry in the user's directory.""" 5638 + user = self.get_user(username) 5639 + if not user: 5640 + return False 5641 + 5642 + # Sanitize entry ID for filename 5643 + from .feed_parser import FeedParser 5644 + parser = FeedParser() 5645 + safe_id = parser.sanitize_entry_id(entry.id) 5646 + 5647 + # Create entry file 5648 + user_dir = self.repo_path / user.directory 5649 + entry_path = user_dir / f"{safe_id}.json" 5650 + 5651 + # Check if entry already exists 5652 + entry_exists = entry_path.exists() 5653 + 5654 + # Save entry 5655 + with open(entry_path, "w") as f: 5656 + json.dump(entry.model_dump(mode="json", exclude_none=True), f, indent=2, default=str) 5657 + 5658 + # Update user metadata if new entry 5659 + if not entry_exists: 5660 + index = self._load_index() 5661 + index.update_entry_count(username, 1) 5662 + self._save_index(index) 5663 + 5664 + return True 5665 + 5666 + def get_entry(self, username: str, entry_id: str) -> Optional[AtomEntry]: 5667 + """Get an entry by username and entry ID.""" 5668 + user = self.get_user(username) 5669 + if not user: 5670 + return None 5671 + 5672 + # Sanitize entry ID 5673 + from .feed_parser import FeedParser 5674 + parser = FeedParser() 5675 + safe_id = parser.sanitize_entry_id(entry_id) 5676 + 5677 + entry_path = self.repo_path / user.directory / f"{safe_id}.json" 5678 + if not entry_path.exists(): 5679 + return None 5680 + 5681 + with open(entry_path) as f: 5682 + data = json.load(f) 5683 + 5684 + return AtomEntry(**data) 5685 + 5686 + def list_entries(self, username: str, limit: Optional[int] = None) -> list[AtomEntry]: 5687 + """List entries for a user.""" 5688 + user = self.get_user(username) 5689 + if not user: 5690 + return [] 5691 + 5692 + user_dir = self.repo_path / user.directory 5693 + if not user_dir.exists(): 5694 + return [] 5695 + 5696 + entries = [] 5697 + entry_files = sorted(user_dir.glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True) 5698 + 5699 + 5700 + if limit: 5701 + entry_files = entry_files[:limit] 5702 + 5703 + for entry_file in entry_files: 5704 + try: 5705 + with open(entry_file) as f: 5706 + data = json.load(f) 5707 + entries.append(AtomEntry(**data)) 5708 + except Exception: 5709 + # Skip invalid entries 5710 + continue 5711 + 5712 + return entries 5713 + 5714 + def get_duplicates(self) -> DuplicateMap: 5715 + """Get the duplicates map.""" 5716 + return self._load_duplicates() 5717 + 5718 + def add_duplicate(self, duplicate_id: str, canonical_id: str) -> None: 5719 + """Add a duplicate mapping.""" 5720 + duplicates = self._load_duplicates() 5721 + duplicates.add_duplicate(duplicate_id, canonical_id) 5722 + self._save_duplicates(duplicates) 5723 + 5724 + def remove_duplicate(self, duplicate_id: str) -> bool: 5725 + """Remove a duplicate mapping.""" 5726 + duplicates = self._load_duplicates() 5727 + result = duplicates.remove_duplicate(duplicate_id) 5728 + self._save_duplicates(duplicates) 5729 + return result 5730 + 5731 + def commit_changes(self, message: str) -> None: 5732 + """Commit all changes to the Git repository.""" 5733 + if not self.repo: 5734 + return 5735 + 5736 + # Add all changes 5737 + self.repo.git.add(A=True) 5738 + 5739 + # Check if there are changes to commit 5740 + if self.repo.index.diff("HEAD"): 5741 + self.repo.index.commit(message) 5742 + 5743 + def get_stats(self) -> dict: 5744 + """Get statistics about the Git store.""" 5745 + index = self._load_index() 5746 + duplicates = self._load_duplicates() 5747 + 5748 + return { 5749 + "total_users": len(index.users), 5750 + "total_entries": index.total_entries, 5751 + "total_duplicates": len(duplicates.duplicates), 5752 + "last_updated": index.last_updated, 5753 + "repository_size": sum(f.stat().st_size for f in self.repo_path.rglob("*") if f.is_file()), 5754 + } 5755 + 5756 + def search_entries(self, query: str, username: Optional[str] = None, 5757 + limit: Optional[int] = None) -> list[tuple[str, AtomEntry]]: 5758 + """Search entries by content.""" 5759 + results = [] 5760 + 5761 + # Get users to search 5762 + index = self._load_index() 5763 + users = [index.get_user(username)] if username else list(index.users.values()) 5764 + users = [u for u in users if u is not None] 5765 + 5766 + for user in users: 5767 + user_dir = self.repo_path / user.directory 5768 + if not user_dir.exists(): 5769 + continue 5770 + 5771 + entry_files = user_dir.glob("*.json") 5772 + 5773 + for entry_file in entry_files: 5774 + try: 5775 + with open(entry_file) as f: 5776 + data = json.load(f) 5777 + 5778 + entry = AtomEntry(**data) 5779 + 5780 + # Simple text search in title, summary, and content 5781 + searchable_text = " ".join(filter(None, [ 5782 + entry.title, 5783 + entry.summary or "", 5784 + entry.content or "", 5785 + ])).lower() 5786 + 5787 + if query.lower() in searchable_text: 5788 + results.append((user.username, entry)) 5789 + 5790 + if limit and len(results) >= limit: 5791 + return results 5792 + 5793 + except Exception: 5794 + # Skip invalid entries 5795 + continue 5796 + 5797 + # Sort by updated time (newest first) 5798 + results.sort(key=lambda x: x[1].updated, reverse=True) 5799 + 5800 + return results[:limit] if limit else results 5801 + </file> 5802 + 5803 + <file path="ARCH.md"> 5804 + # Thicket Architecture Design 5805 + 5806 + ## Overview 5807 + Thicket is a modern CLI tool for persisting Atom/RSS feeds in a Git repository, designed to enable distributed webblog comment structures. 5808 + 5809 + ## Technology Stack 5810 + 5811 + ### Core Libraries 5812 + 5813 + #### CLI Framework 5814 + - **Typer** (0.15.x) - Modern CLI framework with type hints 5815 + - **Rich** (13.x) - Beautiful terminal output, progress bars, and tables 5816 + - **prompt-toolkit** - Interactive prompts when needed 5817 + 5818 + #### Feed Processing 5819 + - **feedparser** (6.0.11) - Universal feed parser supporting RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 5820 + - Alternative: **atoma** for stricter Atom/RSS parsing with JSON feed support 5821 + - Alternative: **fastfeedparser** for high-performance parsing (10x faster) 5822 + 5823 + #### Git Integration 5824 + - **GitPython** (3.1.44) - High-level git operations, requires git CLI 5825 + - Alternative: **pygit2** (1.18.0) - Direct libgit2 bindings, better for authentication 5826 + 5827 + #### HTTP Client 5828 + - **httpx** (0.28.x) - Modern async/sync HTTP client with connection pooling 5829 + - **aiohttp** (3.11.x) - For async-only operations if needed 5830 + 5831 + #### Configuration & Data Models 5832 + - **pydantic** (2.11.x) - Data validation and settings management 5833 + - **pydantic-settings** (2.10.x) - Configuration file handling with env var support 5834 + 5835 + #### Utilities 5836 + - **pendulum** (3.x) - Better datetime handling 5837 + - **bleach** (6.x) - HTML sanitization for feed content 5838 + - **platformdirs** (4.x) - Cross-platform directory paths 5839 + 5840 + ## Project Structure 5841 + 5842 + ``` 5843 + thicket/ 5844 + ├── pyproject.toml # Modern Python packaging 5845 + ├── README.md # Project documentation 5846 + ├── ARCH.md # This file 5847 + ├── CLAUDE.md # Project instructions 5848 + ├── .gitignore 5849 + ├── src/ 5850 + │ └── thicket/ 5851 + │ ├── __init__.py 5852 + │ ├── __main__.py # Entry point for `python -m thicket` 5853 + │ ├── cli/ # CLI commands and interface 5854 + │ │ ├── __init__.py 5855 + │ │ ├── main.py # Main CLI app with Typer 5856 + │ │ ├── commands/ # Subcommands 5857 + │ │ │ ├── __init__.py 5858 + │ │ │ ├── init.py # Initialize git store 5859 + │ │ │ ├── add.py # Add users and feeds 5860 + │ │ │ ├── sync.py # Sync feeds 5861 + │ │ │ ├── list_cmd.py # List users/feeds 5862 + │ │ │ ├── duplicates.py # Manage duplicate entries 5863 + │ │ │ ├── links_cmd.py # Extract and categorize links 5864 + │ │ │ └── index_cmd.py # Build reference index and show threads 5865 + │ │ └── utils.py # CLI utilities (progress, formatting) 5866 + │ ├── core/ # Core business logic 5867 + │ │ ├── __init__.py 5868 + │ │ ├── feed_parser.py # Feed parsing and normalization 5869 + │ │ ├── git_store.py # Git repository operations 5870 + │ │ └── reference_parser.py # Link extraction and threading 5871 + │ ├── models/ # Pydantic data models 5872 + │ │ ├── __init__.py 5873 + │ │ ├── config.py # Configuration models 5874 + │ │ ├── feed.py # Feed/Entry models 5875 + │ │ └── user.py # User metadata models 5876 + │ └── utils/ # Shared utilities 5877 + │ └── __init__.py 5878 + ├── tests/ 5879 + │ ├── __init__.py 5880 + │ ├── conftest.py # pytest configuration 5881 + │ ├── test_feed_parser.py 5882 + │ ├── test_git_store.py 5883 + │ └── fixtures/ # Test data 5884 + │ └── feeds/ 5885 + └── docs/ 5886 + └── examples/ # Example configurations 5887 + ``` 5888 + 5889 + ## Data Models 5890 + 5891 + ### Configuration File (YAML/TOML) 5892 + ```python 5893 + class ThicketConfig(BaseSettings): 5894 + git_store: Path # Git repository location 5895 + cache_dir: Path # Cache directory 5896 + users: list[UserConfig] 5897 + 5898 + model_config = SettingsConfigDict( 5899 + env_prefix="THICKET_", 5900 + env_file=".env", 5901 + yaml_file="thicket.yaml" 5902 + ) 5903 + 5904 + class UserConfig(BaseModel): 5905 + username: str 5906 + feeds: list[HttpUrl] 5907 + email: Optional[EmailStr] = None 5908 + homepage: Optional[HttpUrl] = None 5909 + icon: Optional[HttpUrl] = None 5910 + display_name: Optional[str] = None 5911 + ``` 5912 + 5913 + ### Feed Storage Format 5914 + ```python 5915 + class AtomEntry(BaseModel): 5916 + id: str # Original Atom ID 5917 + title: str 5918 + link: HttpUrl 5919 + updated: datetime 5920 + published: Optional[datetime] 5921 + summary: Optional[str] 5922 + content: Optional[str] # Full body content from Atom entry 5923 + content_type: Optional[str] = "html" # text, html, xhtml 5924 + author: Optional[dict] 5925 + categories: list[str] = [] 5926 + rights: Optional[str] = None # Copyright info 5927 + source: Optional[str] = None # Source feed URL 5928 + # Additional Atom fields preserved during RSS->Atom conversion 5929 + 5930 + model_config = ConfigDict( 5931 + json_encoders={ 5932 + datetime: lambda v: v.isoformat() 5933 + } 5934 + ) 5935 + 5936 + class DuplicateMap(BaseModel): 5937 + """Maps duplicate entry IDs to canonical entry IDs""" 5938 + duplicates: dict[str, str] = {} # duplicate_id -> canonical_id 5939 + comment: str = "Entry IDs that map to the same canonical content" 5940 + 5941 + def add_duplicate(self, duplicate_id: str, canonical_id: str) -> None: 5942 + """Add a duplicate mapping""" 5943 + self.duplicates[duplicate_id] = canonical_id 5944 + 5945 + def remove_duplicate(self, duplicate_id: str) -> bool: 5946 + """Remove a duplicate mapping. Returns True if existed.""" 5947 + return self.duplicates.pop(duplicate_id, None) is not None 5948 + 5949 + def get_canonical(self, entry_id: str) -> str: 5950 + """Get canonical ID for an entry (returns original if not duplicate)""" 5951 + return self.duplicates.get(entry_id, entry_id) 5952 + 5953 + def is_duplicate(self, entry_id: str) -> bool: 5954 + """Check if entry ID is marked as duplicate""" 5955 + return entry_id in self.duplicates 5956 + ``` 5957 + 5958 + ## Git Repository Structure 5959 + ``` 5960 + git-store/ 5961 + ├── index.json # User directory index 5962 + ├── duplicates.json # Manual curation of duplicate entries 5963 + ├── links.json # Unified links, references, and mapping data 5964 + ├── user1/ 5965 + │ ├── entry_id_1.json # Sanitized entry files 5966 + │ ├── entry_id_2.json 5967 + │ └── ... 5968 + └── user2/ 5969 + └── ... 5970 + ``` 5971 + 5972 + ## Key Design Decisions 5973 + 5974 + ### 1. Feed Normalization & Auto-Discovery 5975 + - All RSS feeds converted to Atom format before storage 5976 + - Preserves maximum metadata during conversion 5977 + - Sanitizes HTML content to prevent XSS 5978 + - **Auto-discovery**: Extracts user metadata from feed during `add user` command 5979 + 5980 + ### 2. ID Sanitization 5981 + - Consistent algorithm to convert Atom IDs to safe filenames 5982 + - Handles edge cases (very long IDs, special characters) 5983 + - Maintains reversibility where possible 5984 + 5985 + ### 3. Git Operations 5986 + - Uses GitPython for simplicity (no authentication required) 5987 + - Single main branch for all users and entries 5988 + - Atomic commits per sync operation 5989 + - Meaningful commit messages with feed update summaries 5990 + - Preserves complete history - never delete entries even if they disappear from feeds 5991 + 5992 + ### 4. Caching Strategy 5993 + - HTTP caching with Last-Modified/ETag support 5994 + - Local cache of parsed feeds with TTL 5995 + - Cache invalidation on configuration changes 5996 + - Git store serves as permanent historical archive beyond feed depth limits 5997 + 5998 + ### 5. Error Handling 5999 + - Graceful handling of feed parsing errors 6000 + - Retry logic for network failures 6001 + - Clear error messages with recovery suggestions 6002 + 6003 + ## CLI Command Structure 6004 + 6005 + ```bash 6006 + # Initialize a new git store 6007 + thicket init /path/to/store 6008 + 6009 + # Add a user with feeds (auto-discovers metadata from feed) 6010 + thicket add user "alyssa" \ 6011 + --feed "https://example.com/feed.atom" 6012 + # Auto-populates: email, homepage, icon, display_name from feed metadata 6013 + 6014 + # Add a user with manual overrides 6015 + thicket add user "alyssa" \ 6016 + --feed "https://example.com/feed.atom" \ 6017 + --email "alyssa@example.com" \ 6018 + --homepage "https://alyssa.example.com" \ 6019 + --icon "https://example.com/avatar.png" \ 6020 + --display-name "Alyssa P. Hacker" 6021 + 6022 + # Add additional feed to existing user 6023 + thicket add feed "alyssa" "https://example.com/other-feed.rss" 6024 + 6025 + # Sync all feeds (designed for cron usage) 6026 + thicket sync --all 6027 + 6028 + # Sync specific user 6029 + thicket sync --user alyssa 6030 + 6031 + # List users and their feeds 6032 + thicket list users 6033 + thicket list feeds --user alyssa 6034 + 6035 + # Manage duplicate entries 6036 + thicket duplicates list 6037 + thicket duplicates add <entry_id_1> <entry_id_2> # Mark as duplicates 6038 + thicket duplicates remove <entry_id_1> <entry_id_2> # Unmark duplicates 6039 + 6040 + # Link processing and threading 6041 + thicket links --verbose # Extract and categorize all links 6042 + thicket index --verbose # Build reference index for threading 6043 + thicket threads # Show conversation threads 6044 + thicket threads --username user1 # Show threads for specific user 6045 + thicket threads --min-size 3 # Show threads with minimum size 6046 + ``` 6047 + 6048 + ## Performance Considerations 6049 + 6050 + 1. **Concurrent Feed Fetching**: Use httpx with asyncio for parallel downloads 6051 + 2. **Incremental Updates**: Only fetch/parse feeds that have changed 6052 + 3. **Efficient Git Operations**: Batch commits, use shallow clones where appropriate 6053 + 4. **Progress Feedback**: Rich progress bars for long operations 6054 + 6055 + ## Security Considerations 6056 + 6057 + 1. **HTML Sanitization**: Use bleach to clean feed content 6058 + 2. **URL Validation**: Strict validation of feed URLs 6059 + 3. **Git Security**: No credentials stored in repository 6060 + 4. **Path Traversal**: Careful sanitization of filenames 6061 + 6062 + ## Future Enhancements 6063 + 6064 + 1. **Web Interface**: Optional web UI for browsing the git store 6065 + 2. **Webhooks**: Notify external services on feed updates 6066 + 3. **Feed Discovery**: Auto-discover feeds from HTML pages 6067 + 4. **Export Formats**: Generate static sites, OPML exports 6068 + 5. **Federation**: P2P sync between thicket instances 6069 + 6070 + ## Requirements Clarification 6071 + 6072 + **✓ Resolved Requirements:** 6073 + 1. **Feed Update Frequency**: Designed for cron usage - no built-in scheduling needed 6074 + 2. **Duplicate Handling**: Manual curation via `duplicates.json` file with CLI commands 6075 + 3. **Git Branching**: Single main branch for all users and entries 6076 + 4. **Authentication**: No feeds require authentication currently 6077 + 5. **Content Storage**: Store complete Atom entry body content as provided 6078 + 6. **Deleted Entries**: Preserve all entries in Git store permanently (historical archive) 6079 + 7. **History Depth**: Git store maintains full history beyond feed depth limits 6080 + 8. **Feed Auto-Discovery**: Extract user metadata from feed during `add user` command 6081 + 6082 + ## Duplicate Entry Management 6083 + 6084 + ### Duplicate Detection Strategy 6085 + - **Manual Curation**: Duplicates identified and managed manually via CLI 6086 + - **Storage**: `duplicates.json` file in Git root maps entry IDs to canonical entries 6087 + - **Structure**: `{"duplicate_id": "canonical_id", ...}` 6088 + - **CLI Commands**: Add/remove duplicate mappings with validation 6089 + - **Query Resolution**: Search/list commands resolve duplicates to canonical entries 6090 + 6091 + ### Duplicate File Format 6092 + ```json 6093 + { 6094 + "https://example.com/feed/entry/123": "https://canonical.com/posts/same-post", 6095 + "https://mirror.com/articles/456": "https://canonical.com/posts/same-post", 6096 + "comment": "Entry IDs that map to the same canonical content" 6097 + } 6098 + ``` 6099 + 6100 + ## Feed Metadata Auto-Discovery 6101 + 6102 + ### Extraction Strategy 6103 + When adding a new user with `thicket add user`, the system fetches and parses the feed to extract: 6104 + 6105 + - **Display Name**: From `feed.title` or `feed.author.name` 6106 + - **Email**: From `feed.author.email` or `feed.managingEditor` 6107 + - **Homepage**: From `feed.link` or `feed.author.uri` 6108 + - **Icon**: From `feed.logo`, `feed.icon`, or `feed.image.url` 6109 + 6110 + ### Discovery Priority Order 6111 + 1. **Author Information**: Prefer `feed.author.*` fields (more specific to person) 6112 + 2. **Feed-Level**: Fall back to feed-level metadata 6113 + 3. **Manual Override**: CLI flags always take precedence over discovered values 6114 + 4. **Update Behavior**: Auto-discovery only runs during initial `add user`, not on sync 6115 + 6116 + ### Extracted Metadata Format 6117 + ```python 6118 + class FeedMetadata(BaseModel): 6119 + title: Optional[str] = None 6120 + author_name: Optional[str] = None 6121 + author_email: Optional[EmailStr] = None 6122 + author_uri: Optional[HttpUrl] = None 6123 + link: Optional[HttpUrl] = None 6124 + logo: Optional[HttpUrl] = None 6125 + icon: Optional[HttpUrl] = None 6126 + image_url: Optional[HttpUrl] = None 6127 + 6128 + def to_user_config(self, username: str, feed_url: HttpUrl) -> UserConfig: 6129 + """Convert discovered metadata to UserConfig with fallbacks""" 6130 + return UserConfig( 6131 + username=username, 6132 + feeds=[feed_url], 6133 + display_name=self.author_name or self.title, 6134 + email=self.author_email, 6135 + homepage=self.author_uri or self.link, 6136 + icon=self.logo or self.icon or self.image_url 6137 + ) 6138 + ``` 6139 + 6140 + ## Link Processing and Threading Architecture 6141 + 6142 + ### Overview 6143 + The thicket system implements a sophisticated link processing and threading system to create email-style threaded views of blog entries by tracking cross-references between different blogs. 6144 + 6145 + ### Link Processing Pipeline 6146 + 6147 + #### 1. Link Extraction (`thicket links`) 6148 + The `links` command systematically extracts all outbound links from blog entries and categorizes them: 6149 + 6150 + ```python 6151 + class LinkData(BaseModel): 6152 + url: str # Fully resolved URL 6153 + entry_id: str # Source entry ID 6154 + username: str # Source username 6155 + context: str # Surrounding text context 6156 + category: str # "internal", "user", or "unknown" 6157 + target_username: Optional[str] # Target user if applicable 6158 + ``` 6159 + 6160 + **Link Categories:** 6161 + - **Internal**: Links to the same user's domain (self-references) 6162 + - **User**: Links to other tracked users' domains 6163 + - **Unknown**: Links to external sites not tracked by thicket 6164 + 6165 + #### 2. URL Resolution 6166 + All links are properly resolved using the Atom feed's base URL to handle: 6167 + - Relative URLs (converted to absolute) 6168 + - Protocol-relative URLs 6169 + - Fragment identifiers 6170 + - Redirects and canonical URLs 6171 + 6172 + #### 3. Domain Mapping 6173 + The system builds a comprehensive domain mapping from user configuration: 6174 + - Feed URLs → domain extraction 6175 + - Homepage URLs → domain extraction 6176 + - Reverse mapping: domain → username 6177 + 6178 + ### Threading System 6179 + 6180 + #### 1. Reference Index Generation (`thicket index`) 6181 + Creates a bidirectional reference index from the categorized links: 6182 + 6183 + ```python 6184 + class BlogReference(BaseModel): 6185 + source_entry_id: str 6186 + source_username: str 6187 + target_url: str 6188 + target_username: Optional[str] 6189 + target_entry_id: Optional[str] 6190 + context: str 6191 + ``` 6192 + 6193 + #### 2. Thread Detection Algorithm 6194 + Uses graph traversal to find connected blog entries: 6195 + - **Outbound references**: Links from an entry to other entries 6196 + - **Inbound references**: Links to an entry from other entries 6197 + - **Thread members**: All entries connected through references 6198 + 6199 + #### 3. Threading Display (`thicket threads`) 6200 + Creates email-style threaded views: 6201 + - Chronological ordering within threads 6202 + - Reference counts (outbound/inbound) 6203 + - Context preservation 6204 + - Filtering options (user, entry, minimum size) 6205 + 6206 + ### Data Structures 6207 + 6208 + #### links.json Format (Unified Structure) 6209 + ```json 6210 + { 6211 + "links": { 6212 + "https://example.com/post/123": { 6213 + "referencing_entries": ["https://blog.user.com/entry/456"], 6214 + "target_username": "user2" 6215 + }, 6216 + "https://external-site.com/article": { 6217 + "referencing_entries": ["https://blog.user.com/entry/789"] 6218 + } 6219 + }, 6220 + "reverse_mapping": { 6221 + "https://blog.user.com/entry/456": ["https://example.com/post/123"], 6222 + "https://blog.user.com/entry/789": ["https://external-site.com/article"] 6223 + }, 6224 + "references": [ 6225 + { 6226 + "source_entry_id": "https://blog.user.com/entry/456", 6227 + "source_username": "user1", 6228 + "target_url": "https://example.com/post/123", 6229 + "target_username": "user2", 6230 + "target_entry_id": "https://example.com/post/123", 6231 + "context": "As mentioned in this post..." 6232 + } 6233 + ], 6234 + "user_domains": { 6235 + "user1": ["blog.user.com"], 6236 + "user2": ["example.com"] 6237 + } 6238 + } 6239 + ``` 6240 + 6241 + This unified structure eliminates duplication by: 6242 + - Storing each URL only once with minimal metadata 6243 + - Including all link data, reference data, and mappings in one file 6244 + - Using presence of `target_username` to identify tracked vs external links 6245 + - Providing bidirectional mappings for efficient queries 6246 + 6247 + ### Unified Structure Benefits 6248 + 6249 + - **Eliminates Duplication**: Each URL appears only once with metadata 6250 + - **Single Source of Truth**: All link-related data in one file 6251 + - **Efficient Queries**: Fast lookups for both directions (URL→entries, entry→URLs) 6252 + - **Atomic Updates**: All link data changes together 6253 + - **Reduced I/O**: Fewer file operations 6254 + 6255 + ### Implementation Benefits 6256 + 6257 + 1. **Systematic Link Processing**: All links are extracted and categorized consistently 6258 + 2. **Proper URL Resolution**: Handles relative URLs and base URL resolution correctly 6259 + 3. **Domain-based Categorization**: Automatically identifies user-to-user references 6260 + 4. **Bidirectional Indexing**: Supports both "who links to whom" and "who is linked by whom" 6261 + 5. **Thread Discovery**: Finds conversation threads automatically 6262 + 6. **Rich Context**: Preserves surrounding text for each link 6263 + 7. **Performance**: Pre-computed indexes for fast threading queries 6264 + 6265 + ### CLI Commands 6266 + 6267 + ```bash 6268 + # Extract and categorize all links 6269 + thicket links --verbose 6270 + 6271 + # Build reference index for threading 6272 + thicket index --verbose 6273 + 6274 + # Show all conversation threads 6275 + thicket threads 6276 + 6277 + # Show threads for specific user 6278 + thicket threads --username user1 6279 + 6280 + # Show threads with minimum size 6281 + thicket threads --min-size 3 6282 + ``` 6283 + 6284 + ### Integration with Existing Commands 6285 + 6286 + The link processing system integrates seamlessly with existing thicket commands: 6287 + - `thicket sync` updates entries, requiring `thicket links` to be run afterward 6288 + - `thicket index` uses the output from `thicket links` for improved accuracy 6289 + - `thicket threads` provides the user-facing threading interface 6290 + 6291 + ## Current Implementation Status 6292 + 6293 + ### ✅ Completed Features 6294 + 1. **Core Infrastructure** 6295 + - Modern CLI with Typer and Rich 6296 + - Pydantic data models for type safety 6297 + - Git repository operations with GitPython 6298 + - Feed parsing and normalization with feedparser 6299 + 6300 + 2. **User and Feed Management** 6301 + - `thicket init` - Initialize git store 6302 + - `thicket add` - Add users and feeds with auto-discovery 6303 + - `thicket sync` - Sync feeds with progress tracking 6304 + - `thicket list` - List users, feeds, and entries 6305 + - `thicket duplicates` - Manage duplicate entries 6306 + 6307 + 3. **Link Processing and Threading** 6308 + - `thicket links` - Extract and categorize all outbound links 6309 + - `thicket index` - Build reference index from links 6310 + - `thicket threads` - Display threaded conversation views 6311 + - Proper URL resolution with base URL handling 6312 + - Domain-based link categorization 6313 + - Context preservation for links 6314 + 6315 + ### 📊 System Performance 6316 + - **Link Extraction**: Successfully processes thousands of blog entries 6317 + - **Categorization**: Identifies internal, user, and unknown links 6318 + - **Threading**: Creates email-style threaded views of conversations 6319 + - **Storage**: Efficient JSON-based data structures for links and references 6320 + 6321 + ### 🔧 Current Architecture Highlights 6322 + - **Modular Design**: Clear separation between CLI, core logic, and models 6323 + - **Type Safety**: Comprehensive Pydantic models for data validation 6324 + - **Rich CLI**: Beautiful progress bars, tables, and error handling 6325 + - **Extensible**: Easy to add new commands and features 6326 + - **Git Integration**: All data stored in version-controlled JSON files 6327 + 6328 + ### 🎯 Proven Functionality 6329 + The system has been tested with real blog data and successfully: 6330 + - Extracted 14,396 total links from blog entries 6331 + - Categorized 3,994 internal links, 363 user-to-user links, and 10,039 unknown links 6332 + - Built comprehensive domain mappings for 16 users across 20 domains 6333 + - Generated threaded views showing blog conversation patterns 6334 + 6335 + ### 🚀 Ready for Use 6336 + The thicket system is now fully functional for: 6337 + - Maintaining Git repositories of blog feeds 6338 + - Tracking cross-references between blogs 6339 + - Creating threaded views of blog conversations 6340 + - Discovering blog interaction patterns 6341 + - Building distributed comment systems 6342 + </file> 6343 + 6344 + <file path="src/thicket/cli/utils.py"> 6345 + """CLI utilities and helpers.""" 6346 + 6347 + from pathlib import Path 6348 + from typing import Optional 6349 + 6350 + import typer 6351 + from rich.console import Console 6352 + from rich.progress import Progress, SpinnerColumn, TextColumn 6353 + from rich.table import Table 6354 + 6355 + from ..models import ThicketConfig, UserMetadata 6356 + from ..core.git_store import GitStore 6357 + 6358 + console = Console() 6359 + 6360 + 6361 + def get_tsv_mode() -> bool: 6362 + """Get the global TSV mode setting.""" 6363 + from .main import tsv_mode 6364 + return tsv_mode 6365 + 6366 + 6367 + def load_config(config_path: Optional[Path] = None) -> ThicketConfig: 6368 + """Load thicket configuration from file or environment.""" 6369 + if config_path and config_path.exists(): 6370 + import yaml 6371 + 6372 + with open(config_path) as f: 6373 + config_data = yaml.safe_load(f) 6374 + 6375 + # Convert to ThicketConfig 6376 + return ThicketConfig(**config_data) 6377 + 6378 + # Try to load from default locations or environment 6379 + try: 6380 + # First try to find thicket.yaml in current directory 6381 + default_config = Path("thicket.yaml") 6382 + if default_config.exists(): 6383 + import yaml 6384 + with open(default_config) as f: 6385 + config_data = yaml.safe_load(f) 6386 + return ThicketConfig(**config_data) 6387 + 6388 + # Fall back to environment variables 6389 + return ThicketConfig() 6390 + except Exception as e: 6391 + console.print(f"[red]Error loading configuration: {e}[/red]") 6392 + console.print("[yellow]Run 'thicket init' to create a new configuration.[/yellow]") 6393 + raise typer.Exit(1) from e 6394 + 6395 + 6396 + def save_config(config: ThicketConfig, config_path: Path) -> None: 6397 + """Save thicket configuration to file.""" 6398 + import yaml 6399 + 6400 + config_data = config.model_dump(mode="json", exclude_none=True) 6401 + 6402 + # Convert Path objects to strings for YAML serialization 6403 + config_data["git_store"] = str(config_data["git_store"]) 6404 + config_data["cache_dir"] = str(config_data["cache_dir"]) 6405 + 6406 + with open(config_path, "w") as f: 6407 + yaml.dump(config_data, f, default_flow_style=False, sort_keys=False) 6408 + 6409 + 6410 + def create_progress() -> Progress: 6411 + """Create a Rich progress display.""" 6412 + return Progress( 6413 + SpinnerColumn(), 6414 + TextColumn("[progress.description]{task.description}"), 6415 + console=console, 6416 + transient=True, 6417 + ) 6418 + 6419 + 6420 + def print_users_table(config: ThicketConfig) -> None: 6421 + """Print a table of users and their feeds.""" 6422 + if get_tsv_mode(): 6423 + print_users_tsv(config) 6424 + return 6425 + 6426 + table = Table(title="Users and Feeds") 6427 + table.add_column("Username", style="cyan", no_wrap=True) 6428 + table.add_column("Display Name", style="magenta") 6429 + table.add_column("Email", style="blue") 6430 + table.add_column("Homepage", style="green") 6431 + table.add_column("Feeds", style="yellow") 6432 + 6433 + for user in config.users: 6434 + feeds_str = "\n".join(str(feed) for feed in user.feeds) 6435 + table.add_row( 6436 + user.username, 6437 + user.display_name or "", 6438 + user.email or "", 6439 + str(user.homepage) if user.homepage else "", 6440 + feeds_str, 6441 + ) 6442 + 6443 + console.print(table) 6444 + 6445 + 6446 + def print_feeds_table(config: ThicketConfig, username: Optional[str] = None) -> None: 6447 + """Print a table of feeds, optionally filtered by username.""" 6448 + if get_tsv_mode(): 6449 + print_feeds_tsv(config, username) 6450 + return 6451 + 6452 + table = Table(title=f"Feeds{f' for {username}' if username else ''}") 6453 + table.add_column("Username", style="cyan", no_wrap=True) 6454 + table.add_column("Feed URL", style="blue") 6455 + table.add_column("Status", style="green") 6456 + 6457 + users = [config.find_user(username)] if username else config.users 6458 + users = [u for u in users if u is not None] 6459 + 6460 + for user in users: 6461 + for feed in user.feeds: 6462 + table.add_row( 6463 + user.username, 6464 + str(feed), 6465 + "Active", # TODO: Add actual status checking 6466 + ) 6467 + 6468 + console.print(table) 6469 + 6470 + 6471 + def confirm_action(message: str, default: bool = False) -> bool: 6472 + """Prompt for confirmation.""" 6473 + return typer.confirm(message, default=default) 6474 + 6475 + 6476 + def print_success(message: str) -> None: 6477 + """Print a success message.""" 6478 + console.print(f"[green]✓[/green] {message}") 6479 + 6480 + 6481 + def print_error(message: str) -> None: 6482 + """Print an error message.""" 6483 + console.print(f"[red]✗[/red] {message}") 6484 + 6485 + 6486 + def print_warning(message: str) -> None: 6487 + """Print a warning message.""" 6488 + console.print(f"[yellow]⚠[/yellow] {message}") 6489 + 6490 + 6491 + def print_info(message: str) -> None: 6492 + """Print an info message.""" 6493 + console.print(f"[blue]ℹ[/blue] {message}") 6494 + 6495 + 6496 + def print_users_table_from_git(users: list[UserMetadata]) -> None: 6497 + """Print a table of users from git repository.""" 6498 + if get_tsv_mode(): 6499 + print_users_tsv_from_git(users) 6500 + return 6501 + 6502 + table = Table(title="Users and Feeds") 6503 + table.add_column("Username", style="cyan", no_wrap=True) 6504 + table.add_column("Display Name", style="magenta") 6505 + table.add_column("Email", style="blue") 6506 + table.add_column("Homepage", style="green") 6507 + table.add_column("Feeds", style="yellow") 6508 + 6509 + for user in users: 6510 + feeds_str = "\n".join(user.feeds) 6511 + table.add_row( 6512 + user.username, 6513 + user.display_name or "", 6514 + user.email or "", 6515 + user.homepage or "", 6516 + feeds_str, 6517 + ) 6518 + 6519 + console.print(table) 6520 + 6521 + 6522 + def print_feeds_table_from_git(git_store: GitStore, username: Optional[str] = None) -> None: 6523 + """Print a table of feeds from git repository.""" 6524 + if get_tsv_mode(): 6525 + print_feeds_tsv_from_git(git_store, username) 6526 + return 6527 + 6528 + table = Table(title=f"Feeds{f' for {username}' if username else ''}") 6529 + table.add_column("Username", style="cyan", no_wrap=True) 6530 + table.add_column("Feed URL", style="blue") 6531 + table.add_column("Status", style="green") 6532 + 6533 + if username: 6534 + user = git_store.get_user(username) 6535 + users = [user] if user else [] 6536 + else: 6537 + index = git_store._load_index() 6538 + users = list(index.users.values()) 6539 + 6540 + for user in users: 6541 + for feed in user.feeds: 6542 + table.add_row( 6543 + user.username, 6544 + feed, 6545 + "Active", # TODO: Add actual status checking 6546 + ) 6547 + 6548 + console.print(table) 6549 + 6550 + 6551 + def print_users_tsv(config: ThicketConfig) -> None: 6552 + """Print users in TSV format.""" 6553 + print("Username\tDisplay Name\tEmail\tHomepage\tFeeds") 6554 + for user in config.users: 6555 + feeds_str = ",".join(str(feed) for feed in user.feeds) 6556 + print(f"{user.username}\t{user.display_name or ''}\t{user.email or ''}\t{user.homepage or ''}\t{feeds_str}") 6557 + 6558 + 6559 + def print_users_tsv_from_git(users: list[UserMetadata]) -> None: 6560 + """Print users from git repository in TSV format.""" 6561 + print("Username\tDisplay Name\tEmail\tHomepage\tFeeds") 6562 + for user in users: 6563 + feeds_str = ",".join(user.feeds) 6564 + print(f"{user.username}\t{user.display_name or ''}\t{user.email or ''}\t{user.homepage or ''}\t{feeds_str}") 6565 + 6566 + 6567 + def print_feeds_tsv(config: ThicketConfig, username: Optional[str] = None) -> None: 6568 + """Print feeds in TSV format.""" 6569 + print("Username\tFeed URL\tStatus") 6570 + users = [config.find_user(username)] if username else config.users 6571 + users = [u for u in users if u is not None] 6572 + 6573 + for user in users: 6574 + for feed in user.feeds: 6575 + print(f"{user.username}\t{feed}\tActive") 6576 + 6577 + 6578 + def print_feeds_tsv_from_git(git_store: GitStore, username: Optional[str] = None) -> None: 6579 + """Print feeds from git repository in TSV format.""" 6580 + print("Username\tFeed URL\tStatus") 6581 + 6582 + if username: 6583 + user = git_store.get_user(username) 6584 + users = [user] if user else [] 6585 + else: 6586 + index = git_store._load_index() 6587 + users = list(index.users.values()) 6588 + 6589 + for user in users: 6590 + for feed in user.feeds: 6591 + print(f"{user.username}\t{feed}\tActive") 6592 + 6593 + 6594 + def print_entries_tsv(entries_by_user: list[list], usernames: list[str]) -> None: 6595 + """Print entries in TSV format.""" 6596 + print("User\tAtom ID\tTitle\tUpdated\tURL") 6597 + 6598 + # Combine all entries with usernames 6599 + all_entries = [] 6600 + for entries, username in zip(entries_by_user, usernames): 6601 + for entry in entries: 6602 + all_entries.append((username, entry)) 6603 + 6604 + # Sort by updated time (newest first) 6605 + all_entries.sort(key=lambda x: x[1].updated, reverse=True) 6606 + 6607 + for username, entry in all_entries: 6608 + # Format updated time 6609 + updated_str = entry.updated.strftime("%Y-%m-%d %H:%M") 6610 + 6611 + # Escape tabs and newlines in title to preserve TSV format 6612 + title = entry.title.replace('\t', ' ').replace('\n', ' ').replace('\r', ' ') 6613 + 6614 + print(f"{username}\t{entry.id}\t{title}\t{updated_str}\t{entry.link}") 6615 + </file> 6616 + 6617 + </files>

+5 -1

src/thicket/__init__.py

··· 1 - """Thicket: A CLI tool for persisting Atom/RSS feeds in Git repositories.""" 1 + """Thicket - A library for managing feed repositories and static site generation.""" 2 2 3 + from .thicket import Thicket 4 + from .models import AtomEntry, UserConfig, ThicketConfig 5 + 6 + __all__ = ["Thicket", "AtomEntry", "UserConfig", "ThicketConfig"] 3 7 __version__ = "0.1.0" 4 8 __author__ = "thicket" 5 9 __email__ = "thicket@example.com"

+44 -159

src/thicket/cli/commands/add.py

··· 1 1 """Add command for thicket.""" 2 2 3 - import asyncio 4 3 from pathlib import Path 5 4 from typing import Optional 6 5 7 6 import typer 8 - from pydantic import HttpUrl, ValidationError 7 + from pydantic import ValidationError 9 8 10 - from ...core.feed_parser import FeedParser 11 - from ...core.git_store import GitStore 12 - from ..main import app 13 - from ..utils import ( 14 - create_progress, 15 - load_config, 16 - print_error, 17 - print_info, 18 - print_success, 19 - ) 9 + from ..main import app, console, load_thicket 20 10 21 11 22 12 @app.command("add") 23 - def add_command( 24 - subcommand: str = typer.Argument(..., help="Subcommand: 'user' or 'feed'"), 13 + def add_user( 25 14 username: str = typer.Argument(..., help="Username"), 26 - feed_url: Optional[str] = typer.Argument(None, help="Feed URL (required for 'user' command)"), 15 + feeds: list[str] = typer.Argument(..., help="Feed URLs"), 27 16 email: Optional[str] = typer.Option(None, "--email", "-e", help="User email"), 28 17 homepage: Optional[str] = typer.Option(None, "--homepage", "-h", help="User homepage"), 29 18 icon: Optional[str] = typer.Option(None, "--icon", "-i", help="User icon URL"), 30 19 display_name: Optional[str] = typer.Option(None, "--display-name", "-d", help="User display name"), 31 20 config_file: Optional[Path] = typer.Option( 32 - Path("thicket.yaml"), "--config", help="Configuration file path" 33 - ), 34 - auto_discover: bool = typer.Option( 35 - True, "--auto-discover/--no-auto-discover", help="Auto-discover user metadata from feed" 21 + None, "--config", help="Configuration file path" 36 22 ), 37 23 ) -> None: 38 - """Add a user or feed to thicket.""" 39 - 40 - if subcommand == "user": 41 - add_user(username, feed_url, email, homepage, icon, display_name, config_file, auto_discover) 42 - elif subcommand == "feed": 43 - add_feed(username, feed_url, config_file) 44 - else: 45 - print_error(f"Unknown subcommand: {subcommand}") 46 - print_error("Use 'user' or 'feed'") 47 - raise typer.Exit(1) 48 - 49 - 50 - def add_user( 51 - username: str, 52 - feed_url: Optional[str], 53 - email: Optional[str], 54 - homepage: Optional[str], 55 - icon: Optional[str], 56 - display_name: Optional[str], 57 - config_file: Path, 58 - auto_discover: bool, 59 - ) -> None: 60 - """Add a new user with feed.""" 61 - 62 - if not feed_url: 63 - print_error("Feed URL is required when adding a user") 64 - raise typer.Exit(1) 65 - 66 - # Validate feed URL 24 + """Add a user with their feeds to thicket.""" 25 + 67 26 try: 68 - validated_feed_url = HttpUrl(feed_url) 69 - except ValidationError: 70 - print_error(f"Invalid feed URL: {feed_url}") 71 - raise typer.Exit(1) from None 72 - 73 - # Load configuration 74 - config = load_config(config_file) 75 - 76 - # Initialize Git store 77 - git_store = GitStore(config.git_store) 78 - 79 - # Check if user already exists 80 - existing_user = git_store.get_user(username) 81 - if existing_user: 82 - print_error(f"User '{username}' already exists") 83 - print_error("Use 'thicket add feed' to add additional feeds") 27 + # Load Thicket instance 28 + thicket = load_thicket(config_file) 29 + 30 + # Prepare user data 31 + user_data = {} 32 + if email: 33 + user_data['email'] = email 34 + if homepage: 35 + user_data['homepage'] = homepage 36 + if icon: 37 + user_data['icon'] = icon 38 + if display_name: 39 + user_data['display_name'] = display_name 40 + 41 + # Add the user 42 + user_config = thicket.add_user(username, feeds, **user_data) 43 + 44 + console.print(f"[green]✓[/green] Added user: {username}") 45 + console.print(f" • Display name: {user_config.display_name or 'None'}") 46 + console.print(f" • Email: {user_config.email or 'None'}") 47 + console.print(f" • Homepage: {user_config.homepage or 'None'}") 48 + console.print(f" • Feeds: {len(user_config.feeds)}") 49 + 50 + for feed in user_config.feeds: 51 + console.print(f" - {feed}") 52 + 53 + # Commit the addition 54 + commit_message = f"Add user {username} with {len(feeds)} feed(s)" 55 + if thicket.commit_changes(commit_message): 56 + console.print(f"[green]✓[/green] Committed: {commit_message}") 57 + else: 58 + console.print("[yellow]Warning:[/yellow] Failed to commit changes") 59 + 60 + except ValidationError as e: 61 + console.print(f"[red]Validation Error:[/red] {str(e)}") 84 62 raise typer.Exit(1) 85 - 86 - # Auto-discover metadata if enabled 87 - discovered_metadata = None 88 - if auto_discover: 89 - discovered_metadata = asyncio.run(discover_feed_metadata(validated_feed_url)) 90 - 91 - # Prepare user data with manual overrides taking precedence 92 - user_display_name = display_name or (discovered_metadata.author_name or discovered_metadata.title if discovered_metadata else None) 93 - user_email = email or (discovered_metadata.author_email if discovered_metadata else None) 94 - user_homepage = homepage or (str(discovered_metadata.author_uri or discovered_metadata.link) if discovered_metadata else None) 95 - user_icon = icon or (str(discovered_metadata.logo or discovered_metadata.icon or discovered_metadata.image_url) if discovered_metadata else None) 96 - 97 - # Add user to Git store 98 - git_store.add_user( 99 - username=username, 100 - display_name=user_display_name, 101 - email=user_email, 102 - homepage=user_homepage, 103 - icon=user_icon, 104 - feeds=[str(validated_feed_url)], 105 - ) 106 - 107 - # Commit changes 108 - git_store.commit_changes(f"Add user: {username}") 109 - 110 - print_success(f"Added user '{username}' with feed: {feed_url}") 111 - 112 - if discovered_metadata and auto_discover: 113 - print_info("Auto-discovered metadata:") 114 - if user_display_name: 115 - print_info(f" Display name: {user_display_name}") 116 - if user_email: 117 - print_info(f" Email: {user_email}") 118 - if user_homepage: 119 - print_info(f" Homepage: {user_homepage}") 120 - if user_icon: 121 - print_info(f" Icon: {user_icon}") 122 - 123 - 124 - def add_feed(username: str, feed_url: Optional[str], config_file: Path) -> None: 125 - """Add a feed to an existing user.""" 126 - 127 - if not feed_url: 128 - print_error("Feed URL is required") 63 + except Exception as e: 64 + console.print(f"[red]Error:[/red] {str(e)}") 129 65 raise typer.Exit(1) 130 66 131 - # Validate feed URL 132 - try: 133 - validated_feed_url = HttpUrl(feed_url) 134 - except ValidationError: 135 - print_error(f"Invalid feed URL: {feed_url}") 136 - raise typer.Exit(1) from None 137 - 138 - # Load configuration 139 - config = load_config(config_file) 140 - 141 - # Initialize Git store 142 - git_store = GitStore(config.git_store) 143 - 144 - # Check if user exists 145 - user = git_store.get_user(username) 146 - if not user: 147 - print_error(f"User '{username}' not found") 148 - print_error("Use 'thicket add user' to add a new user") 149 - raise typer.Exit(1) 150 - 151 - # Check if feed already exists 152 - if str(validated_feed_url) in user.feeds: 153 - print_error(f"Feed already exists for user '{username}': {feed_url}") 154 - raise typer.Exit(1) 155 - 156 - # Add feed to user 157 - updated_feeds = user.feeds + [str(validated_feed_url)] 158 - if git_store.update_user(username, feeds=updated_feeds): 159 - git_store.commit_changes(f"Add feed to user {username}: {feed_url}") 160 - print_success(f"Added feed to user '{username}': {feed_url}") 161 - else: 162 - print_error(f"Failed to add feed to user '{username}'") 163 - raise typer.Exit(1) 164 - 165 - 166 - async def discover_feed_metadata(feed_url: HttpUrl): 167 - """Discover metadata from a feed URL.""" 168 - try: 169 - with create_progress() as progress: 170 - task = progress.add_task("Discovering feed metadata...", total=None) 171 - 172 - parser = FeedParser() 173 - content = await parser.fetch_feed(feed_url) 174 - metadata, _ = parser.parse_feed(content, feed_url) 175 - 176 - progress.update(task, completed=True) 177 - return metadata 178 - 179 - except Exception as e: 180 - print_error(f"Failed to discover feed metadata: {e}") 181 - return None

+35 -679

src/thicket/cli/commands/generate.py

··· 1 1 """Generate static HTML website from thicket data.""" 2 2 3 - import base64 4 - import json 5 - import re 6 - import shutil 7 - from datetime import datetime 8 3 from pathlib import Path 9 - from typing import Any, Optional, TypedDict, Union 4 + from typing import Optional 10 5 11 6 import typer 12 - from jinja2 import Environment, FileSystemLoader, select_autoescape 13 - from rich.progress import Progress, SpinnerColumn, TextColumn 14 - 15 - from ...core.git_store import GitStore 16 - from ...models.feed import AtomEntry 17 - from ...models.user import GitStoreIndex, UserMetadata 18 - from ..main import app 19 - from ..utils import console, load_config 20 - 21 - 22 - class UserData(TypedDict): 23 - """Type definition for user data structure.""" 24 - 25 - metadata: UserMetadata 26 - recent_entries: list[tuple[str, AtomEntry]] 27 - 28 - 29 - def safe_anchor_id(atom_id: str) -> str: 30 - """Convert an Atom ID to a safe HTML anchor ID.""" 31 - # Use base64 URL-safe encoding without padding 32 - encoded = base64.urlsafe_b64encode(atom_id.encode('utf-8')).decode('ascii').rstrip('=') 33 - # Prefix with 'id' to ensure it starts with a letter (HTML requirement) 34 - return f"id{encoded}" 35 - 36 - 37 - class WebsiteGenerator: 38 - """Generate static HTML website from thicket data.""" 39 - 40 - def __init__(self, git_store: GitStore, output_dir: Path): 41 - self.git_store = git_store 42 - self.output_dir = output_dir 43 - self.template_dir = Path(__file__).parent.parent.parent / "templates" 44 - 45 - # Initialize Jinja2 environment 46 - self.env = Environment( 47 - loader=FileSystemLoader(self.template_dir), 48 - autoescape=select_autoescape(["html", "xml"]), 49 - ) 50 7 51 - # Data containers 52 - self.index: Optional[GitStoreIndex] = None 53 - self.entries: list[tuple[str, AtomEntry]] = [] # (username, entry) 54 - self.links_data: Optional[dict[str, Any]] = None 55 - self.threads: list[list[dict[str, Any]]] = [] # List of threads with metadata 56 - 57 - def get_display_name(self, username: str) -> str: 58 - """Get display name for a user, falling back to username.""" 59 - if self.index and username in self.index.users: 60 - user = self.index.users[username] 61 - return user.display_name or username 62 - return username 63 - 64 - def get_user_homepage(self, username: str) -> Optional[str]: 65 - """Get homepage URL for a user.""" 66 - if self.index and username in self.index.users: 67 - user = self.index.users[username] 68 - return str(user.homepage) if user.homepage else None 69 - return None 70 - 71 - def clean_html_summary(self, content: Optional[str], max_length: int = 200) -> str: 72 - """Clean HTML content and truncate for display in timeline.""" 73 - if not content: 74 - return "" 75 - 76 - # Remove HTML tags 77 - clean_text = re.sub(r"<[^>]+>", " ", content) 78 - # Replace multiple whitespace with single space 79 - clean_text = re.sub(r"\s+", " ", clean_text) 80 - # Strip leading/trailing whitespace 81 - clean_text = clean_text.strip() 82 - 83 - # Truncate with ellipsis if needed 84 - if len(clean_text) > max_length: 85 - # Try to break at word boundary 86 - truncated = clean_text[:max_length] 87 - last_space = truncated.rfind(" ") 88 - if ( 89 - last_space > max_length * 0.8 90 - ): # If we can break reasonably close to the limit 91 - clean_text = truncated[:last_space] + "..." 92 - else: 93 - clean_text = truncated + "..." 94 - 95 - return clean_text 96 - 97 - def load_data(self) -> None: 98 - """Load all data from the git repository.""" 99 - with Progress( 100 - SpinnerColumn(), 101 - TextColumn("[progress.description]{task.description}"), 102 - console=console, 103 - ) as progress: 104 - # Load index 105 - task = progress.add_task("Loading repository index...", total=None) 106 - self.index = self.git_store._load_index() 107 - if not self.index: 108 - raise ValueError("No index found in repository") 109 - progress.update(task, completed=True) 110 - 111 - # Load all entries 112 - task = progress.add_task("Loading entries...", total=None) 113 - for username, user_metadata in self.index.users.items(): 114 - user_dir = self.git_store.repo_path / user_metadata.directory 115 - if user_dir.exists(): 116 - for entry_file in user_dir.glob("*.json"): 117 - if entry_file.name not in ["index.json", "duplicates.json"]: 118 - try: 119 - with open(entry_file) as f: 120 - entry_data = json.load(f) 121 - entry = AtomEntry(**entry_data) 122 - self.entries.append((username, entry)) 123 - except Exception as e: 124 - console.print( 125 - f"[yellow]Warning: Failed to load {entry_file}: {e}[/yellow]" 126 - ) 127 - progress.update(task, completed=True) 128 - 129 - # Sort entries by date (newest first) - prioritize updated over published 130 - self.entries.sort( 131 - key=lambda x: x[1].updated or x[1].published or datetime.min, reverse=True 132 - ) 133 - 134 - # Load links data 135 - task = progress.add_task("Loading links and references...", total=None) 136 - links_file = self.git_store.repo_path / "links.json" 137 - if links_file.exists(): 138 - with open(links_file) as f: 139 - self.links_data = json.load(f) 140 - progress.update(task, completed=True) 141 - 142 - def build_threads(self) -> None: 143 - """Build threaded conversations from references.""" 144 - if not self.links_data or "references" not in self.links_data: 145 - return 146 - 147 - # Map entry IDs to (username, entry) tuples 148 - entry_map: dict[str, tuple[str, AtomEntry]] = {} 149 - for username, entry in self.entries: 150 - entry_map[entry.id] = (username, entry) 151 - 152 - # Build adjacency lists for references 153 - self.outbound_refs: dict[str, set[str]] = {} 154 - self.inbound_refs: dict[str, set[str]] = {} 155 - self.reference_details: dict[ 156 - str, list[dict[str, Any]] 157 - ] = {} # Store full reference info 158 - 159 - for ref in self.links_data["references"]: 160 - source_id = ref["source_entry_id"] 161 - target_id = ref.get("target_entry_id") 162 - 163 - if target_id and source_id in entry_map and target_id in entry_map: 164 - self.outbound_refs.setdefault(source_id, set()).add(target_id) 165 - self.inbound_refs.setdefault(target_id, set()).add(source_id) 166 - 167 - # Store reference details for UI 168 - self.reference_details.setdefault(source_id, []).append( 169 - { 170 - "target_id": target_id, 171 - "target_username": ref.get("target_username"), 172 - "type": "outbound", 173 - } 174 - ) 175 - self.reference_details.setdefault(target_id, []).append( 176 - { 177 - "source_id": source_id, 178 - "source_username": ref.get("source_username"), 179 - "type": "inbound", 180 - } 181 - ) 182 - 183 - # Find conversation threads (multi-post discussions) 184 - processed = set() 185 - 186 - for entry_id, (_username, _entry) in entry_map.items(): 187 - if entry_id in processed: 188 - continue 189 - 190 - # Build thread starting from this entry 191 - thread = [] 192 - to_visit = [entry_id] 193 - thread_ids = set() 194 - level_map: dict[str, int] = {} # Track levels for this thread 195 - 196 - # First, traverse up to find the root 197 - current = entry_id 198 - while current in self.inbound_refs: 199 - parents = self.inbound_refs[current] - { 200 - current 201 - } # Exclude self-references 202 - if not parents: 203 - break 204 - # Take the first parent 205 - parent = next(iter(parents)) 206 - if parent in thread_ids: # Avoid cycles 207 - break 208 - current = parent 209 - to_visit.insert(0, current) 210 - 211 - # Now traverse down from the root 212 - while to_visit: 213 - current = to_visit.pop(0) 214 - if current in thread_ids or current not in entry_map: 215 - continue 216 - 217 - thread_ids.add(current) 218 - username, entry = entry_map[current] 219 - 220 - # Calculate thread level 221 - thread_level = self._calculate_thread_level(current, level_map) 222 - 223 - # Add threading metadata 224 - thread_entry = { 225 - "username": username, 226 - "display_name": self.get_display_name(username), 227 - "entry": entry, 228 - "entry_id": current, 229 - "references_to": list(self.outbound_refs.get(current, [])), 230 - "referenced_by": list(self.inbound_refs.get(current, [])), 231 - "thread_level": thread_level, 232 - } 233 - thread.append(thread_entry) 234 - processed.add(current) 235 - 236 - # Add children 237 - if current in self.outbound_refs: 238 - children = self.outbound_refs[current] - thread_ids # Avoid cycles 239 - to_visit.extend(sorted(children)) 240 - 241 - if len(thread) > 1: # Only keep actual threads 242 - # Sort thread by date (newest first) - prioritize updated over published 243 - thread.sort(key=lambda x: x["entry"].updated or x["entry"].published or datetime.min, reverse=True) # type: ignore 244 - self.threads.append(thread) 245 - 246 - # Sort threads by the date of their most recent entry - prioritize updated over published 247 - self.threads.sort( 248 - key=lambda t: max( 249 - item["entry"].updated or item["entry"].published or datetime.min for item in t 250 - ), 251 - reverse=True, 252 - ) 253 - 254 - def _calculate_thread_level( 255 - self, entry_id: str, processed_entries: dict[str, int] 256 - ) -> int: 257 - """Calculate indentation level for threaded display.""" 258 - if entry_id in processed_entries: 259 - return processed_entries[entry_id] 260 - 261 - if entry_id not in self.inbound_refs: 262 - processed_entries[entry_id] = 0 263 - return 0 264 - 265 - parents_in_thread = self.inbound_refs[entry_id] & set(processed_entries.keys()) 266 - if not parents_in_thread: 267 - processed_entries[entry_id] = 0 268 - return 0 269 - 270 - # Find the deepest parent level + 1 271 - max_parent_level = 0 272 - for parent_id in parents_in_thread: 273 - parent_level = self._calculate_thread_level(parent_id, processed_entries) 274 - max_parent_level = max(max_parent_level, parent_level) 275 - 276 - level = min(max_parent_level + 1, 4) # Cap at level 4 277 - processed_entries[entry_id] = level 278 - return level 279 - 280 - def get_standalone_references(self) -> list[dict[str, Any]]: 281 - """Get posts that have references but aren't part of multi-post threads.""" 282 - if not hasattr(self, "reference_details"): 283 - return [] 284 - 285 - threaded_entry_ids = set() 286 - for thread in self.threads: 287 - for item in thread: 288 - threaded_entry_ids.add(item["entry_id"]) 289 - 290 - standalone_refs = [] 291 - for username, entry in self.entries: 292 - if ( 293 - entry.id in self.reference_details 294 - and entry.id not in threaded_entry_ids 295 - ): 296 - refs = self.reference_details[entry.id] 297 - # Only include if it has meaningful references (not just self-references) 298 - meaningful_refs = [ 299 - r 300 - for r in refs 301 - if r.get("target_id") != entry.id and r.get("source_id") != entry.id 302 - ] 303 - if meaningful_refs: 304 - standalone_refs.append( 305 - { 306 - "username": username, 307 - "display_name": self.get_display_name(username), 308 - "entry": entry, 309 - "references": meaningful_refs, 310 - } 311 - ) 312 - 313 - return standalone_refs 314 - 315 - def _add_cross_thread_links(self, timeline_items: list[dict[str, Any]]) -> None: 316 - """Add cross-thread linking for entries that appear in multiple threads.""" 317 - # Map entry IDs to their positions in the timeline 318 - entry_positions: dict[str, list[int]] = {} 319 - # Map URLs referenced by entries to the entries that reference them 320 - url_references: dict[str, list[tuple[str, int]]] = {} # url -> [(entry_id, position)] 321 - 322 - # First pass: collect all entry IDs, their positions, and referenced URLs 323 - for i, item in enumerate(timeline_items): 324 - if item["type"] == "post": 325 - entry_id = item["content"]["entry"].id 326 - entry_positions.setdefault(entry_id, []).append(i) 327 - # Track URLs this entry references 328 - if entry_id in self.reference_details: 329 - for ref in self.reference_details[entry_id]: 330 - if ref["type"] == "outbound" and "target_id" in ref: 331 - # Find the target entry's URL if available 332 - target_entry = self._find_entry_by_id(ref["target_id"]) 333 - if target_entry and target_entry.link: 334 - url = str(target_entry.link) 335 - url_references.setdefault(url, []).append((entry_id, i)) 336 - elif item["type"] == "thread": 337 - for thread_item in item["content"]: 338 - entry_id = thread_item["entry"].id 339 - entry_positions.setdefault(entry_id, []).append(i) 340 - # Track URLs this entry references 341 - if entry_id in self.reference_details: 342 - for ref in self.reference_details[entry_id]: 343 - if ref["type"] == "outbound" and "target_id" in ref: 344 - target_entry = self._find_entry_by_id(ref["target_id"]) 345 - if target_entry and target_entry.link: 346 - url = str(target_entry.link) 347 - url_references.setdefault(url, []).append((entry_id, i)) 348 - 349 - # Build cross-thread connections - only for entries that actually appear multiple times 350 - cross_thread_connections: dict[str, set[int]] = {} # entry_id -> set of timeline positions 351 - 352 - # Add connections ONLY for entries that appear multiple times in the timeline 353 - for entry_id, positions in entry_positions.items(): 354 - if len(positions) > 1: 355 - cross_thread_connections[entry_id] = set(positions) 356 - # Debug: uncomment to see which entries have multiple appearances 357 - # print(f"Entry {entry_id[:50]}... appears at positions: {positions}") 358 - 359 - # Apply cross-thread links to timeline items 360 - for entry_id, positions_set in cross_thread_connections.items(): 361 - positions_list = list(positions_set) 362 - for pos in positions_list: 363 - item = timeline_items[pos] 364 - other_positions = sorted([p for p in positions_list if p != pos]) 365 - 366 - if item["type"] == "post": 367 - # Add cross-thread info to individual posts 368 - item["content"]["cross_thread_links"] = self._build_cross_thread_link_data(entry_id, other_positions, timeline_items) 369 - # Add info about shared references 370 - item["content"]["shared_references"] = self._get_shared_references(entry_id, positions_set, timeline_items) 371 - elif item["type"] == "thread": 372 - # Add cross-thread info to thread items 373 - for thread_item in item["content"]: 374 - if thread_item["entry"].id == entry_id: 375 - thread_item["cross_thread_links"] = self._build_cross_thread_link_data(entry_id, other_positions, timeline_items) 376 - thread_item["shared_references"] = self._get_shared_references(entry_id, positions_set, timeline_items) 377 - break 378 - 379 - def _build_cross_thread_link_data(self, entry_id: str, other_positions: list[int], timeline_items: list[dict[str, Any]]) -> list[dict[str, Any]]: 380 - """Build detailed cross-thread link data with anchor information.""" 381 - cross_thread_links = [] 382 - 383 - for pos in other_positions: 384 - item = timeline_items[pos] 385 - if item["type"] == "post": 386 - # For individual posts 387 - safe_id = safe_anchor_id(entry_id) 388 - cross_thread_links.append({ 389 - "position": pos, 390 - "anchor_id": f"post-{pos}-{safe_id}", 391 - "context": "individual post", 392 - "title": item["content"]["entry"].title 393 - }) 394 - elif item["type"] == "thread": 395 - # For thread items, find the specific thread item 396 - for thread_idx, thread_item in enumerate(item["content"]): 397 - if thread_item["entry"].id == entry_id: 398 - safe_id = safe_anchor_id(entry_id) 399 - cross_thread_links.append({ 400 - "position": pos, 401 - "anchor_id": f"post-{pos}-{thread_idx}-{safe_id}", 402 - "context": f"thread (level {thread_item.get('thread_level', 0)})", 403 - "title": thread_item["entry"].title 404 - }) 405 - break 406 - 407 - return cross_thread_links 8 + from ..main import app, console, load_thicket 408 9 409 - def _find_entry_by_id(self, entry_id: str) -> Optional[AtomEntry]: 410 - """Find an entry by its ID.""" 411 - for _username, entry in self.entries: 412 - if entry.id == entry_id: 413 - return entry 414 - return None 415 10 416 - def _get_shared_references(self, entry_id: str, positions: Union[set[int], list[int]], timeline_items: list[dict[str, Any]]) -> list[dict[str, Any]]: 417 - """Get information about shared references between cross-thread entries.""" 418 - shared_refs = [] 419 - 420 - # Collect all referenced URLs from entries at these positions 421 - url_counts: dict[str, int] = {} 422 - referencing_entries: dict[str, list[str]] = {} # url -> [entry_ids] 423 - 424 - for pos in positions: 425 - item = timeline_items[pos] 426 - entries_to_check = [] 427 - 428 - if item["type"] == "post": 429 - entries_to_check.append(item["content"]["entry"]) 430 - elif item["type"] == "thread": 431 - entries_to_check.extend([ti["entry"] for ti in item["content"]]) 432 - 433 - for entry in entries_to_check: 434 - if entry.id in self.reference_details: 435 - for ref in self.reference_details[entry.id]: 436 - if ref["type"] == "outbound" and "target_id" in ref: 437 - target_entry = self._find_entry_by_id(ref["target_id"]) 438 - if target_entry and target_entry.link: 439 - url = str(target_entry.link) 440 - url_counts[url] = url_counts.get(url, 0) + 1 441 - if url not in referencing_entries: 442 - referencing_entries[url] = [] 443 - if entry.id not in referencing_entries[url]: 444 - referencing_entries[url].append(entry.id) 445 - 446 - # Find URLs referenced by multiple entries 447 - for url, count in url_counts.items(): 448 - if count > 1 and len(referencing_entries[url]) > 1: 449 - # Get the target entry info 450 - target_entry = None 451 - target_username = None 452 - for ref in (self.links_data or {}).get("references", []): 453 - if ref.get("target_url") == url: 454 - target_username = ref.get("target_username") 455 - if ref.get("target_entry_id"): 456 - target_entry = self._find_entry_by_id(ref["target_entry_id"]) 457 - break 458 - 459 - shared_refs.append({ 460 - "url": url, 461 - "count": count, 462 - "referencing_entries": referencing_entries[url], 463 - "target_username": target_username, 464 - "target_title": target_entry.title if target_entry else None 465 - }) 466 - 467 - return sorted(shared_refs, key=lambda x: x["count"], reverse=True) 468 - 469 - def generate_site(self) -> None: 470 - """Generate the static website.""" 471 - # Create output directory 472 - self.output_dir.mkdir(parents=True, exist_ok=True) 473 - 474 - # Create static directories 475 - (self.output_dir / "css").mkdir(exist_ok=True) 476 - (self.output_dir / "js").mkdir(exist_ok=True) 477 - 478 - # Generate CSS 479 - css_template = self.env.get_template("style.css") 480 - css_content = css_template.render() 481 - with open(self.output_dir / "css" / "style.css", "w") as f: 482 - f.write(css_content) 483 - 484 - # Generate JavaScript 485 - js_template = self.env.get_template("script.js") 486 - js_content = js_template.render() 487 - with open(self.output_dir / "js" / "script.js", "w") as f: 488 - f.write(js_content) 489 - 490 - # Prepare common template data 491 - base_data = { 492 - "title": "Energy & Environment Group", 493 - "generated_at": datetime.now().isoformat(), 494 - "get_display_name": self.get_display_name, 495 - "get_user_homepage": self.get_user_homepage, 496 - "clean_html_summary": self.clean_html_summary, 497 - "safe_anchor_id": safe_anchor_id, 498 - } 499 - 500 - # Build unified timeline 501 - timeline_items = [] 502 - 503 - # Only consider the threads that will actually be displayed 504 - displayed_threads = self.threads[:20] # Limit to 20 threads 505 - 506 - # Track which entries are part of displayed threads 507 - threaded_entry_ids = set() 508 - for thread in displayed_threads: 509 - for item in thread: 510 - threaded_entry_ids.add(item["entry_id"]) 511 - 512 - # Add threads to timeline (using the date of the most recent post) 513 - for thread in displayed_threads: 514 - most_recent_date = max( 515 - item["entry"].updated or item["entry"].published or datetime.min 516 - for item in thread 517 - ) 518 - timeline_items.append({ 519 - "type": "thread", 520 - "date": most_recent_date, 521 - "content": thread 522 - }) 523 - 524 - # Add individual posts (not in threads) 525 - for username, entry in self.entries[:50]: 526 - if entry.id not in threaded_entry_ids: 527 - # Check if this entry has references 528 - has_refs = ( 529 - entry.id in self.reference_details 530 - if hasattr(self, "reference_details") 531 - else False 532 - ) 533 - 534 - refs = [] 535 - if has_refs: 536 - refs = self.reference_details.get(entry.id, []) 537 - refs = [ 538 - r for r in refs 539 - if r.get("target_id") != entry.id 540 - and r.get("source_id") != entry.id 541 - ] 542 - 543 - timeline_items.append({ 544 - "type": "post", 545 - "date": entry.updated or entry.published or datetime.min, 546 - "content": { 547 - "username": username, 548 - "display_name": self.get_display_name(username), 549 - "entry": entry, 550 - "references": refs if refs else None 551 - } 552 - }) 553 - 554 - # Sort unified timeline by date (newest first) 555 - timeline_items.sort(key=lambda x: x["date"], reverse=True) 556 - 557 - # Limit timeline to what will actually be rendered 558 - timeline_items = timeline_items[:50] # Limit to 50 items total 559 - 560 - # Add cross-thread linking for repeat blog references 561 - self._add_cross_thread_links(timeline_items) 562 - 563 - # Prepare outgoing links data 564 - outgoing_links = [] 565 - if self.links_data and "links" in self.links_data: 566 - for url, link_info in self.links_data["links"].items(): 567 - referencing_entries = [] 568 - for entry_id in link_info.get("referencing_entries", []): 569 - for username, entry in self.entries: 570 - if entry.id == entry_id: 571 - referencing_entries.append( 572 - (self.get_display_name(username), entry) 573 - ) 574 - break 575 - 576 - if referencing_entries: 577 - # Sort by date - prioritize updated over published 578 - referencing_entries.sort( 579 - key=lambda x: x[1].updated or x[1].published or datetime.min, reverse=True 580 - ) 581 - outgoing_links.append( 582 - { 583 - "url": url, 584 - "target_username": link_info.get("target_username"), 585 - "entries": referencing_entries, 586 - } 587 - ) 588 - 589 - # Sort links by most recent reference - prioritize updated over published 590 - outgoing_links.sort( 591 - key=lambda x: x["entries"][0][1].updated 592 - or x["entries"][0][1].published or datetime.min, 593 - reverse=True, 594 - ) 595 - 596 - # Prepare users data 597 - users: list[UserData] = [] 598 - if self.index: 599 - for username, user_metadata in self.index.users.items(): 600 - # Get recent entries for this user with display names 601 - user_entries = [ 602 - (self.get_display_name(u), e) 603 - for u, e in self.entries 604 - if u == username 605 - ][:5] 606 - users.append( 607 - {"metadata": user_metadata, "recent_entries": user_entries} 608 - ) 609 - # Sort by entry count 610 - users.sort(key=lambda x: x["metadata"].entry_count, reverse=True) 611 - 612 - # Generate timeline page 613 - timeline_template = self.env.get_template("timeline.html") 614 - timeline_content = timeline_template.render( 615 - **base_data, 616 - page="timeline", 617 - timeline_items=timeline_items, # Already limited above 618 - ) 619 - with open(self.output_dir / "timeline.html", "w") as f: 620 - f.write(timeline_content) 621 - 622 - # Generate links page 623 - links_template = self.env.get_template("links.html") 624 - links_content = links_template.render( 625 - **base_data, 626 - page="links", 627 - outgoing_links=outgoing_links[:100], 628 - ) 629 - with open(self.output_dir / "links.html", "w") as f: 630 - f.write(links_content) 631 - 632 - # Generate users page 633 - users_template = self.env.get_template("users.html") 634 - users_content = users_template.render( 635 - **base_data, 636 - page="users", 637 - users=users, 638 - ) 639 - with open(self.output_dir / "users.html", "w") as f: 640 - f.write(users_content) 641 - 642 - # Generate main index page (redirect to timeline) 643 - index_template = self.env.get_template("index.html") 644 - index_content = index_template.render(**base_data) 645 - with open(self.output_dir / "index.html", "w") as f: 646 - f.write(index_content) 647 - 648 - console.print(f"[green]✓[/green] Generated website at {self.output_dir}") 649 - console.print(f" - {len(self.entries)} entries") 650 - console.print(f" - {len(self.threads)} conversation threads") 651 - console.print(f" - {len(outgoing_links)} outgoing links") 652 - console.print(f" - {len(users)} users") 653 - console.print( 654 - " - Generated pages: index.html, timeline.html, links.html, users.html" 655 - ) 656 11 657 12 658 13 @app.command() ··· 663 18 "-o", 664 19 help="Output directory for the generated website", 665 20 ), 666 - force: bool = typer.Option( 667 - False, "--force", "-f", help="Overwrite existing output directory" 21 + template_dir: Optional[Path] = typer.Option( 22 + None, "--templates", help="Custom template directory" 668 23 ), 669 - config_file: Path = typer.Option( 670 - Path("thicket.yaml"), "--config", help="Configuration file path" 24 + config_file: Optional[Path] = typer.Option( 25 + None, "--config", help="Configuration file path" 671 26 ), 672 27 ) -> None: 673 28 """Generate a static HTML website from thicket data.""" 674 - config = load_config(config_file) 675 - 676 - if not config.git_store: 677 - console.print("[red]No git store path configured[/red]") 678 - raise typer.Exit(1) 679 - 680 - git_store = GitStore(config.git_store) 681 - 682 - # Check if output directory exists 683 - if output.exists() and not force: 684 - console.print( 685 - f"[red]Output directory {output} already exists. Use --force to overwrite.[/red]" 686 - ) 687 - raise typer.Exit(1) 688 - 689 - # Clean output directory if forcing 690 - if output.exists() and force: 691 - shutil.rmtree(output) 692 - 29 + 693 30 try: 694 - generator = WebsiteGenerator(git_store, output) 695 - 696 - console.print("[bold]Generating static website...[/bold]") 697 - generator.load_data() 698 - generator.build_threads() 699 - generator.generate_site() 700 - 31 + # Load Thicket instance 32 + thicket = load_thicket(config_file) 33 + 34 + console.print(f"[blue]Generating static site to:[/blue] {output}") 35 + 36 + # Generate the complete site 37 + if thicket.generate_site(output, template_dir): 38 + console.print(f"[green]✓[/green] Successfully generated site at {output}") 39 + 40 + # Show what was generated 41 + stats = thicket.get_stats() 42 + console.print(f" • {stats.get('total_entries', 0)} entries") 43 + console.print(f" • {stats.get('total_users', 0)} users") 44 + console.print(f" • {stats.get('unique_urls', 0)} unique links") 45 + 46 + # List generated files 47 + if output.exists(): 48 + html_files = list(output.glob("*.html")) 49 + if html_files: 50 + console.print(" • Generated pages:") 51 + for html_file in sorted(html_files): 52 + console.print(f" - {html_file.name}") 53 + else: 54 + console.print("[red]✗[/red] Failed to generate site") 55 + raise typer.Exit(1) 56 + 701 57 except Exception as e: 702 - console.print(f"[red]Error generating website: {e}[/red]") 703 - raise typer.Exit(1) from e 58 + console.print(f"[red]Error:[/red] {str(e)}") 59 + raise typer.Exit(1)

+50 -39

src/thicket/cli/commands/init.py

··· 1 1 """Initialize command for thicket.""" 2 2 3 + import yaml 3 4 from pathlib import Path 4 5 from typing import Optional 5 6 6 7 import typer 7 - from pydantic import ValidationError 8 8 9 - from ...core.git_store import GitStore 9 + from ..main import app, console, get_config_path 10 10 from ...models import ThicketConfig 11 - from ..main import app 12 - from ..utils import print_error, print_success, save_config 11 + from ... import Thicket 13 12 14 13 15 14 @app.command() ··· 19 18 None, "--cache-dir", "-c", help="Cache directory (default: ~/.cache/thicket)" 20 19 ), 21 20 config_file: Optional[Path] = typer.Option( 22 - None, "--config", help="Configuration file path (default: thicket.yaml)" 21 + None, "--config", help="Configuration file path (default: ~/.config/thicket/config.yaml)" 23 22 ), 24 23 force: bool = typer.Option( 25 24 False, "--force", "-f", help="Overwrite existing configuration" ··· 29 28 30 29 # Set default paths 31 30 if cache_dir is None: 32 - from platformdirs import user_cache_dir 33 - cache_dir = Path(user_cache_dir("thicket")) 31 + cache_dir = Path.home() / ".cache" / "thicket" 34 32 35 33 if config_file is None: 36 - config_file = Path("thicket.yaml") 34 + config_file = get_config_path() 37 35 38 36 # Check if config already exists 39 37 if config_file.exists() and not force: 40 - print_error(f"Configuration file already exists: {config_file}") 41 - print_error("Use --force to overwrite") 38 + console.print(f"[red]Configuration file already exists:[/red] {config_file}") 39 + console.print("Use --force to overwrite") 42 40 raise typer.Exit(1) 43 41 44 - # Create cache directory 45 - cache_dir.mkdir(parents=True, exist_ok=True) 42 + try: 43 + # Create directories 44 + git_store.mkdir(parents=True, exist_ok=True) 45 + cache_dir.mkdir(parents=True, exist_ok=True) 46 + config_file.parent.mkdir(parents=True, exist_ok=True) 46 47 47 - # Create Git store 48 - try: 49 - GitStore(git_store) 50 - print_success(f"Initialized Git store at: {git_store}") 51 - except Exception as e: 52 - print_error(f"Failed to initialize Git store: {e}") 53 - raise typer.Exit(1) from e 48 + # Create Thicket instance with minimal config 49 + thicket = Thicket.create(git_store, cache_dir) 50 + 51 + # Initialize the repository 52 + if thicket.init_repository(): 53 + console.print(f"[green]✓[/green] Initialized Git store at: {git_store}") 54 + else: 55 + console.print(f"[red]✗[/red] Failed to initialize Git store") 56 + raise typer.Exit(1) 57 + 58 + # Save configuration 59 + config_data = { 60 + 'git_store': str(git_store), 61 + 'cache_dir': str(cache_dir), 62 + 'users': [] 63 + } 64 + 65 + with open(config_file, 'w') as f: 66 + yaml.dump(config_data, f, default_flow_style=False) 67 + 68 + console.print(f"[green]✓[/green] Created configuration file: {config_file}") 54 69 55 - # Create configuration 56 - try: 57 - config = ThicketConfig( 58 - git_store=git_store, 59 - cache_dir=cache_dir, 60 - users=[] 61 - ) 70 + # Create initial commit 71 + if thicket.commit_changes("Initialize thicket repository"): 72 + console.print("[green]✓[/green] Created initial commit") 62 73 63 - save_config(config, config_file) 64 - print_success(f"Created configuration file: {config_file}") 74 + console.print("\n[green]Thicket initialized successfully![/green]") 75 + console.print(f" • Git store: {git_store}") 76 + console.print(f" • Cache directory: {cache_dir}") 77 + console.print(f" • Configuration: {config_file}") 78 + console.print("\n[blue]Next steps:[/blue]") 79 + console.print(" 1. Add your first user and feed:") 80 + console.print(f" [cyan]thicket add username https://example.com/feed.xml[/cyan]") 81 + console.print(" 2. Sync feeds:") 82 + console.print(f" [cyan]thicket sync[/cyan]") 83 + console.print(" 3. Generate a website:") 84 + console.print(f" [cyan]thicket generate[/cyan]") 65 85 66 - except ValidationError as e: 67 - print_error(f"Invalid configuration: {e}") 68 - raise typer.Exit(1) from e 69 86 except Exception as e: 70 - print_error(f"Failed to create configuration: {e}") 71 - raise typer.Exit(1) from e 72 - 73 - print_success("Thicket initialized successfully!") 74 - print_success(f"Git store: {git_store}") 75 - print_success(f"Cache directory: {cache_dir}") 76 - print_success(f"Configuration: {config_file}") 77 - print_success("Run 'thicket add user' to add your first user and feed.") 87 + console.print(f"[red]Error:[/red] {str(e)}") 88 + raise typer.Exit(1)

+75 -121

src/thicket/cli/commands/sync.py

··· 5 5 from typing import Optional 6 6 7 7 import typer 8 - from rich.progress import track 8 + from rich.progress import Progress, SpinnerColumn, TextColumn 9 9 10 - from ...core.feed_parser import FeedParser 11 - from ...core.git_store import GitStore 12 - from ..main import app 13 - from ..utils import ( 14 - load_config, 15 - print_error, 16 - print_info, 17 - print_success, 18 - ) 10 + from ..main import app, console, load_thicket 19 11 20 12 21 13 @app.command() 22 14 def sync( 23 - all_users: bool = typer.Option( 24 - False, "--all", "-a", help="Sync all users and feeds" 25 - ), 26 15 user: Optional[str] = typer.Option( 27 - None, "--user", "-u", help="Sync specific user only" 16 + None, "--user", "-u", help="Sync specific user only (default: all users)" 28 17 ), 29 18 config_file: Optional[Path] = typer.Option( 30 - Path("thicket.yaml"), "--config", help="Configuration file path" 19 + None, "--config", help="Configuration file path" 31 20 ), 32 - dry_run: bool = typer.Option( 33 - False, "--dry-run", help="Show what would be synced without making changes" 21 + commit: bool = typer.Option( 22 + True, "--commit/--no-commit", help="Commit changes after sync" 34 23 ), 35 24 ) -> None: 36 25 """Sync feeds and store entries in Git repository.""" 37 - 38 - # Load configuration 39 - config = load_config(config_file) 40 - 41 - # Initialize Git store 42 - git_store = GitStore(config.git_store) 43 - 44 - # Determine which users to sync from git repository 45 - users_to_sync = [] 46 - if all_users: 47 - index = git_store._load_index() 48 - users_to_sync = list(index.users.values()) 49 - elif user: 50 - user_metadata = git_store.get_user(user) 51 - if not user_metadata: 52 - print_error(f"User '{user}' not found in git repository") 53 - raise typer.Exit(1) 54 - users_to_sync = [user_metadata] 55 - else: 56 - print_error("Specify --all to sync all users or --user to sync a specific user") 57 - raise typer.Exit(1) 58 - 59 - if not users_to_sync: 60 - print_info("No users configured to sync") 61 - return 62 - 63 - # Sync each user 64 - total_new_entries = 0 65 - total_updated_entries = 0 66 - 67 - for user_metadata in users_to_sync: 68 - print_info(f"Syncing user: {user_metadata.username}") 69 - 70 - user_new_entries = 0 71 - user_updated_entries = 0 72 - 73 - # Sync each feed for the user 74 - for feed_url in track(user_metadata.feeds, description=f"Syncing {user_metadata.username}'s feeds"): 75 - try: 76 - new_entries, updated_entries = asyncio.run( 77 - sync_feed(git_store, user_metadata.username, feed_url, dry_run) 78 - ) 79 - user_new_entries += new_entries 80 - user_updated_entries += updated_entries 81 - 82 - except Exception as e: 83 - print_error(f"Failed to sync feed {feed_url}: {e}") 84 - continue 85 - 86 - print_info(f"User {user_metadata.username}: {user_new_entries} new, {user_updated_entries} updated") 87 - total_new_entries += user_new_entries 88 - total_updated_entries += user_updated_entries 89 - 90 - # Commit changes if not dry run 91 - if not dry_run and (total_new_entries > 0 or total_updated_entries > 0): 92 - commit_message = f"Sync feeds: {total_new_entries} new entries, {total_updated_entries} updated" 93 - git_store.commit_changes(commit_message) 94 - print_success(f"Committed changes: {commit_message}") 95 - 96 - # Summary 97 - if dry_run: 98 - print_info(f"Dry run complete: would sync {total_new_entries} new entries, {total_updated_entries} updated") 99 - else: 100 - print_success(f"Sync complete: {total_new_entries} new entries, {total_updated_entries} updated") 101 - 102 - 103 - async def sync_feed(git_store: GitStore, username: str, feed_url, dry_run: bool) -> tuple[int, int]: 104 - """Sync a single feed for a user.""" 105 - 106 - parser = FeedParser() 107 - 26 + 108 27 try: 109 - # Fetch and parse feed 110 - content = await parser.fetch_feed(feed_url) 111 - metadata, entries = parser.parse_feed(content, feed_url) 112 - 113 - new_entries = 0 114 - updated_entries = 0 115 - 116 - # Process each entry 117 - for entry in entries: 118 - try: 119 - # Check if entry already exists 120 - existing_entry = git_store.get_entry(username, entry.id) 121 - 122 - if existing_entry: 123 - # Check if entry has been updated 124 - if existing_entry.updated != entry.updated: 125 - if not dry_run: 126 - git_store.store_entry(username, entry) 127 - updated_entries += 1 128 - else: 129 - # New entry 130 - if not dry_run: 131 - git_store.store_entry(username, entry) 132 - new_entries += 1 133 - 134 - except Exception as e: 135 - print_error(f"Failed to process entry {entry.id}: {e}") 136 - continue 137 - 138 - return new_entries, updated_entries 139 - 28 + # Load Thicket instance 29 + thicket = load_thicket(config_file) 30 + 31 + # Progress callback for tracking 32 + current_task = None 33 + 34 + def progress_callback(message: str, current: int = 0, total: int = 0): 35 + nonlocal current_task 36 + current_task = message 37 + if total > 0: 38 + console.print(f"[blue]Progress:[/blue] {message} ({current}/{total})") 39 + else: 40 + console.print(f"[blue]Info:[/blue] {message}") 41 + 42 + # Run sync with progress 43 + with Progress( 44 + SpinnerColumn(), 45 + TextColumn("[progress.description]{task.description}"), 46 + console=console, 47 + transient=True, 48 + ) as progress: 49 + task = progress.add_task("Syncing feeds...", total=None) 50 + 51 + # Perform sync 52 + results = asyncio.run(thicket.sync_feeds(user, progress_callback)) 53 + 54 + progress.remove_task(task) 55 + 56 + # Process results 57 + total_new = 0 58 + total_processed = 0 59 + errors = [] 60 + 61 + if isinstance(results, dict): 62 + for username, user_results in results.items(): 63 + if 'error' in user_results: 64 + errors.append(f"{username}: {user_results['error']}") 65 + continue 66 + 67 + total_new += user_results.get('new_entries', 0) 68 + total_processed += user_results.get('feeds_processed', 0) 69 + 70 + console.print(f"[green]✓[/green] {username}: {user_results.get('new_entries', 0)} new entries from {user_results.get('feeds_processed', 0)} feeds") 71 + 72 + # Show any feed-specific errors 73 + for error in user_results.get('errors', []): 74 + console.print(f" [yellow]Warning:[/yellow] {error}") 75 + 76 + # Show errors 77 + for error in errors: 78 + console.print(f"[red]Error:[/red] {error}") 79 + 80 + # Commit changes if requested 81 + if commit and total_new > 0: 82 + commit_message = f"Sync feeds: {total_new} new entries from {total_processed} feeds" 83 + if thicket.commit_changes(commit_message): 84 + console.print(f"[green]✓[/green] Committed: {commit_message}") 85 + else: 86 + console.print("[red]✗[/red] Failed to commit changes") 87 + 88 + # Summary 89 + if total_new > 0: 90 + console.print(f"\n[green]Sync complete:[/green] {total_new} new entries processed") 91 + else: 92 + console.print("\n[blue]Sync complete:[/blue] No new entries found") 93 + 140 94 except Exception as e: 141 - print_error(f"Failed to sync feed {feed_url}: {e}") 142 - return 0, 0 95 + console.print(f"[red]Error:[/red] {str(e)}") 96 + raise typer.Exit(1)

+35 -1

src/thicket/cli/main.py

··· 1 1 """Main CLI application using Typer.""" 2 2 3 + from pathlib import Path 4 + from typing import Optional 5 + 3 6 import typer 4 7 from rich.console import Console 5 8 6 - from .. import __version__ 9 + from .. import __version__, Thicket, ThicketConfig 7 10 8 11 app = typer.Typer( 9 12 name="thicket", ··· 23 26 if value: 24 27 console.print(f"thicket version {__version__}") 25 28 raise typer.Exit() 29 + 30 + 31 + def load_thicket(config_path: Optional[Path] = None) -> Thicket: 32 + """Load Thicket instance from configuration.""" 33 + if config_path and config_path.exists(): 34 + return Thicket.from_config_file(config_path) 35 + 36 + # Try default locations 37 + default_paths = [ 38 + Path("thicket.yaml"), 39 + Path("thicket.yml"), 40 + Path("thicket.json"), 41 + Path.home() / ".config" / "thicket" / "config.yaml", 42 + Path.home() / ".thicket.yaml", 43 + ] 44 + 45 + for path in default_paths: 46 + if path.exists(): 47 + return Thicket.from_config_file(path) 48 + 49 + # No config found 50 + console.print("[red]Error:[/red] No configuration file found.") 51 + console.print("Use [bold]thicket init[/bold] to create a new configuration or specify --config") 52 + raise typer.Exit(1) 53 + 54 + 55 + def get_config_path() -> Path: 56 + """Get the default configuration path for new configs.""" 57 + config_dir = Path.home() / ".config" / "thicket" 58 + config_dir.mkdir(parents=True, exist_ok=True) 59 + return config_dir / "config.yaml" 26 60 27 61 28 62 @app.callback()

+30 -2

src/thicket/models/config.py

··· 1 1 """Configuration models for thicket.""" 2 2 3 + import json 4 + import yaml 3 5 from pathlib import Path 4 - from typing import Optional 6 + from typing import Optional, Union 5 7 6 - from pydantic import BaseModel, EmailStr, HttpUrl 8 + from pydantic import BaseModel, EmailStr, HttpUrl, ValidationError 7 9 from pydantic_settings import BaseSettings, SettingsConfigDict 8 10 9 11 ··· 31 33 git_store: Path 32 34 cache_dir: Path 33 35 users: list[UserConfig] = [] 36 + 37 + @classmethod 38 + def from_file(cls, config_path: Path) -> 'ThicketConfig': 39 + """Load configuration from a file.""" 40 + if not config_path.exists(): 41 + raise FileNotFoundError(f"Configuration file not found: {config_path}") 42 + 43 + content = config_path.read_text(encoding='utf-8') 44 + 45 + if config_path.suffix.lower() in ['.yaml', '.yml']: 46 + try: 47 + data = yaml.safe_load(content) 48 + except yaml.YAMLError as e: 49 + raise ValueError(f"Invalid YAML in {config_path}: {e}") 50 + elif config_path.suffix.lower() == '.json': 51 + try: 52 + data = json.loads(content) 53 + except json.JSONDecodeError as e: 54 + raise ValueError(f"Invalid JSON in {config_path}: {e}") 55 + else: 56 + raise ValueError(f"Unsupported configuration file format: {config_path.suffix}") 57 + 58 + try: 59 + return cls(**data) 60 + except ValidationError as e: 61 + raise ValueError(f"Configuration validation error: {e}")

+1

src/thicket/subsystems/__init__.py

··· 1 + """Thicket subsystems for specialized operations."""

+227

src/thicket/subsystems/feeds.py

··· 1 + """Feed management subsystem.""" 2 + 3 + import asyncio 4 + import json 5 + from datetime import datetime 6 + from pathlib import Path 7 + from typing import Callable, Optional 8 + 9 + from pydantic import HttpUrl 10 + 11 + from ..core.feed_parser import FeedParser 12 + from ..core.git_store import GitStore 13 + from ..models import AtomEntry, ThicketConfig 14 + 15 + 16 + class FeedManager: 17 + """Manages feed operations and caching.""" 18 + 19 + def __init__(self, git_store: GitStore, feed_parser: FeedParser, config: ThicketConfig): 20 + """Initialize feed manager.""" 21 + self.git_store = git_store 22 + self.feed_parser = feed_parser 23 + self.config = config 24 + self._ensure_cache_dir() 25 + 26 + def _ensure_cache_dir(self): 27 + """Ensure cache directory exists.""" 28 + self.config.cache_dir.mkdir(parents=True, exist_ok=True) 29 + 30 + async def sync_feeds(self, username: Optional[str] = None, progress_callback: Optional[Callable] = None) -> dict: 31 + """Sync feeds for all users or specific user.""" 32 + if username: 33 + return await self.sync_user_feeds(username, progress_callback) 34 + 35 + # Sync all users 36 + results = {} 37 + total_users = len(self.config.users) 38 + 39 + for i, user_config in enumerate(self.config.users): 40 + if progress_callback: 41 + progress_callback(f"Syncing feeds for {user_config.username}", i, total_users) 42 + 43 + user_results = await self.sync_user_feeds(user_config.username, progress_callback) 44 + results[user_config.username] = user_results 45 + 46 + return results 47 + 48 + async def sync_user_feeds(self, username: str, progress_callback: Optional[Callable] = None) -> dict: 49 + """Sync feeds for a specific user.""" 50 + user_config = next((u for u in self.config.users if u.username == username), None) 51 + if not user_config: 52 + return {'error': f'User {username} not found in configuration'} 53 + 54 + # Ensure user exists in git store 55 + git_user = self.git_store.get_user(username) 56 + if not git_user: 57 + self.git_store.add_user( 58 + username=user_config.username, 59 + display_name=user_config.display_name, 60 + email=str(user_config.email) if user_config.email else None, 61 + homepage=str(user_config.homepage) if user_config.homepage else None, 62 + icon=str(user_config.icon) if user_config.icon else None, 63 + feeds=[str(feed) for feed in user_config.feeds] 64 + ) 65 + 66 + results = { 67 + 'username': username, 68 + 'feeds_processed': 0, 69 + 'new_entries': 0, 70 + 'errors': [], 71 + 'feeds': {} 72 + } 73 + 74 + total_feeds = len(user_config.feeds) 75 + 76 + for i, feed_url in enumerate(user_config.feeds): 77 + if progress_callback: 78 + progress_callback(f"Processing feed {i+1}/{total_feeds} for {username}", i, total_feeds) 79 + 80 + try: 81 + feed_result = await self._sync_single_feed(username, feed_url) 82 + results['feeds'][str(feed_url)] = feed_result 83 + results['feeds_processed'] += 1 84 + results['new_entries'] += feed_result.get('new_entries', 0) 85 + except Exception as e: 86 + error_msg = f"Error syncing {feed_url}: {str(e)}" 87 + results['errors'].append(error_msg) 88 + results['feeds'][str(feed_url)] = {'error': error_msg} 89 + 90 + return results 91 + 92 + async def _sync_single_feed(self, username: str, feed_url: HttpUrl) -> dict: 93 + """Sync a single feed for a user.""" 94 + cache_key = self._get_cache_key(username, feed_url) 95 + last_modified = self._get_last_modified(cache_key) 96 + 97 + try: 98 + # Fetch feed content 99 + content = await self.feed_parser.fetch_feed(feed_url) 100 + 101 + # Parse feed 102 + feed_meta, entries = self.feed_parser.parse_feed(content, feed_url) 103 + 104 + # Filter new entries 105 + new_entries = [] 106 + for entry in entries: 107 + existing_entry = self.git_store.get_entry(username, entry.id) 108 + if not existing_entry: 109 + new_entries.append(entry) 110 + 111 + # Store new entries 112 + stored_count = 0 113 + for entry in new_entries: 114 + if self.git_store.store_entry(username, entry): 115 + stored_count += 1 116 + 117 + # Update cache 118 + self._update_cache(cache_key, { 119 + 'last_fetched': datetime.now().isoformat(), 120 + 'feed_meta': feed_meta.model_dump(exclude_none=True), 121 + 'entry_count': len(entries), 122 + 'new_entries': stored_count, 123 + 'feed_url': str(feed_url) 124 + }) 125 + 126 + return { 127 + 'success': True, 128 + 'total_entries': len(entries), 129 + 'new_entries': stored_count, 130 + 'feed_title': feed_meta.title, 131 + 'last_fetched': datetime.now().isoformat() 132 + } 133 + 134 + except Exception as e: 135 + return { 136 + 'success': False, 137 + 'error': str(e), 138 + 'feed_url': str(feed_url) 139 + } 140 + 141 + def get_entries(self, username: str, limit: Optional[int] = None) -> list[AtomEntry]: 142 + """Get entries for a user.""" 143 + return self.git_store.list_entries(username, limit) 144 + 145 + def get_entry(self, username: str, entry_id: str) -> Optional[AtomEntry]: 146 + """Get a specific entry.""" 147 + return self.git_store.get_entry(username, entry_id) 148 + 149 + def search_entries(self, query: str, username: Optional[str] = None, limit: Optional[int] = None) -> list[tuple[str, AtomEntry]]: 150 + """Search entries across users.""" 151 + return self.git_store.search_entries(query, username, limit) 152 + 153 + def get_stats(self) -> dict: 154 + """Get feed-related statistics.""" 155 + index = self.git_store._load_index() 156 + 157 + feed_stats = { 158 + 'total_feeds_configured': sum(len(user.feeds) for user in self.config.users), 159 + 'users_with_entries': len([u for u in index.users.values() if u.entry_count > 0]), 160 + 'cache_files': len(list(self.config.cache_dir.glob("*.json"))) if self.config.cache_dir.exists() else 0, 161 + } 162 + 163 + return feed_stats 164 + 165 + def _get_cache_key(self, username: str, feed_url: HttpUrl) -> str: 166 + """Generate cache key for feed.""" 167 + # Simple hash of username and feed URL 168 + import hashlib 169 + key_data = f"{username}:{str(feed_url)}" 170 + return hashlib.md5(key_data.encode()).hexdigest() 171 + 172 + def _get_last_modified(self, cache_key: str) -> Optional[datetime]: 173 + """Get last modified time from cache.""" 174 + cache_file = self.config.cache_dir / f"{cache_key}.json" 175 + if cache_file.exists(): 176 + try: 177 + with open(cache_file) as f: 178 + data = json.load(f) 179 + return datetime.fromisoformat(data.get('last_fetched', '')) 180 + except Exception: 181 + pass 182 + return None 183 + 184 + def _update_cache(self, cache_key: str, data: dict): 185 + """Update cache with feed data.""" 186 + cache_file = self.config.cache_dir / f"{cache_key}.json" 187 + try: 188 + with open(cache_file, 'w') as f: 189 + json.dump(data, f, indent=2) 190 + except Exception: 191 + # Cache update failure shouldn't break the sync 192 + pass 193 + 194 + def clear_cache(self, username: Optional[str] = None) -> bool: 195 + """Clear feed cache.""" 196 + try: 197 + if username: 198 + # Clear cache for specific user 199 + for user_config in self.config.users: 200 + if user_config.username == username: 201 + for feed_url in user_config.feeds: 202 + cache_key = self._get_cache_key(username, feed_url) 203 + cache_file = self.config.cache_dir / f"{cache_key}.json" 204 + if cache_file.exists(): 205 + cache_file.unlink() 206 + else: 207 + # Clear all cache 208 + if self.config.cache_dir.exists(): 209 + for cache_file in self.config.cache_dir.glob("*.json"): 210 + cache_file.unlink() 211 + return True 212 + except Exception: 213 + return False 214 + 215 + def get_feed_info(self, username: str, feed_url: str) -> Optional[dict]: 216 + """Get cached information about a specific feed.""" 217 + try: 218 + feed_url_obj = HttpUrl(feed_url) 219 + cache_key = self._get_cache_key(username, feed_url_obj) 220 + cache_file = self.config.cache_dir / f"{cache_key}.json" 221 + 222 + if cache_file.exists(): 223 + with open(cache_file) as f: 224 + return json.load(f) 225 + except Exception: 226 + pass 227 + return None

+304

src/thicket/subsystems/links.py

··· 1 + """Link processing subsystem.""" 2 + 3 + import json 4 + import re 5 + from collections import defaultdict 6 + from pathlib import Path 7 + from typing import Optional 8 + from urllib.parse import urljoin, urlparse 9 + 10 + from ..core.git_store import GitStore 11 + from ..models import AtomEntry, ThicketConfig 12 + 13 + 14 + class LinkProcessor: 15 + """Processes and manages links between entries.""" 16 + 17 + def __init__(self, git_store: GitStore, config: ThicketConfig): 18 + """Initialize link processor.""" 19 + self.git_store = git_store 20 + self.config = config 21 + self.links_file = self.git_store.repo_path / "links.json" 22 + 23 + def process_links(self, username: Optional[str] = None) -> dict: 24 + """Process and extract links from entries.""" 25 + if username: 26 + return self._process_user_links(username) 27 + 28 + # Process all users 29 + results = {} 30 + index = self.git_store._load_index() 31 + 32 + for user_metadata in index.users.values(): 33 + user_results = self._process_user_links(user_metadata.username) 34 + results[user_metadata.username] = user_results 35 + 36 + # Consolidate all links 37 + self._consolidate_links() 38 + 39 + return results 40 + 41 + def _process_user_links(self, username: str) -> dict: 42 + """Process links for a specific user.""" 43 + entries = self.git_store.list_entries(username) 44 + 45 + results = { 46 + 'username': username, 47 + 'entries_processed': 0, 48 + 'links_found': 0, 49 + 'external_links': 0, 50 + 'internal_links': 0, 51 + } 52 + 53 + links_data = self._load_links_data() 54 + 55 + for entry in entries: 56 + entry_links = self._extract_links_from_entry(entry) 57 + 58 + if entry_links: 59 + # Store links for this entry 60 + entry_key = f"{username}:{entry.id}" 61 + links_data[entry_key] = { 62 + 'entry_id': entry.id, 63 + 'username': username, 64 + 'title': entry.title, 65 + 'links': entry_links, 66 + 'processed_at': entry.updated.isoformat() if entry.updated else None, 67 + } 68 + 69 + results['links_found'] += len(entry_links) 70 + results['external_links'] += len([l for l in entry_links if self._is_external_link(l['url'])]) 71 + results['internal_links'] += len([l for l in entry_links if not self._is_external_link(l['url'])]) 72 + 73 + results['entries_processed'] += 1 74 + 75 + self._save_links_data(links_data) 76 + 77 + return results 78 + 79 + def _extract_links_from_entry(self, entry: AtomEntry) -> list[dict]: 80 + """Extract links from an entry's content.""" 81 + links = [] 82 + 83 + # Combine content and summary for link extraction 84 + text_content = "" 85 + if entry.content: 86 + text_content += entry.content 87 + if entry.summary: 88 + text_content += " " + entry.summary 89 + 90 + if not text_content: 91 + return links 92 + 93 + # Extract HTML links 94 + html_link_pattern = r'<a[^>]+href=["\']([^"\']+)["\'][^>]*>([^<]*)</a>' 95 + html_matches = re.findall(html_link_pattern, text_content, re.IGNORECASE) 96 + 97 + for url, text in html_matches: 98 + # Clean up the URL 99 + url = url.strip() 100 + text = text.strip() 101 + 102 + if url and url not in ['#', 'javascript:void(0)']: 103 + # Resolve relative URLs if possible 104 + if entry.link and url.startswith('/'): 105 + base_url = str(entry.link) 106 + parsed_base = urlparse(base_url) 107 + base_domain = f"{parsed_base.scheme}://{parsed_base.netloc}" 108 + url = urljoin(base_domain, url) 109 + 110 + links.append({ 111 + 'url': url, 112 + 'text': text or url, 113 + 'type': 'html' 114 + }) 115 + 116 + # Extract markdown links 117 + markdown_link_pattern = r'\[([^\]]*)\]$([^$]+)\)' 118 + markdown_matches = re.findall(markdown_link_pattern, text_content) 119 + 120 + for text, url in markdown_matches: 121 + url = url.strip() 122 + text = text.strip() 123 + 124 + if url and url not in ['#']: 125 + links.append({ 126 + 'url': url, 127 + 'text': text or url, 128 + 'type': 'markdown' 129 + }) 130 + 131 + # Extract plain URLs 132 + url_pattern = r'https?://[^\s<>"]+[^\s<>".,;!?]' 133 + url_matches = re.findall(url_pattern, text_content) 134 + 135 + for url in url_matches: 136 + # Skip if already found as HTML or markdown link 137 + if not any(link['url'] == url for link in links): 138 + links.append({ 139 + 'url': url, 140 + 'text': url, 141 + 'type': 'plain' 142 + }) 143 + 144 + return links 145 + 146 + def _is_external_link(self, url: str) -> bool: 147 + """Check if a link is external to the configured domains.""" 148 + try: 149 + parsed = urlparse(url) 150 + domain = parsed.netloc.lower() 151 + 152 + # Check against user domains from feeds 153 + for user_config in self.config.users: 154 + for feed_url in user_config.feeds: 155 + feed_domain = urlparse(str(feed_url)).netloc.lower() 156 + if domain == feed_domain or domain.endswith(f'.{feed_domain}'): 157 + return False 158 + 159 + # Check homepage domain 160 + if user_config.homepage: 161 + homepage_domain = urlparse(str(user_config.homepage)).netloc.lower() 162 + if domain == homepage_domain or domain.endswith(f'.{homepage_domain}'): 163 + return False 164 + 165 + return True 166 + except Exception: 167 + return True 168 + 169 + def _load_links_data(self) -> dict: 170 + """Load existing links data.""" 171 + if self.links_file.exists(): 172 + try: 173 + with open(self.links_file) as f: 174 + return json.load(f) 175 + except Exception: 176 + pass 177 + return {} 178 + 179 + def _save_links_data(self, links_data: dict): 180 + """Save links data to file.""" 181 + try: 182 + with open(self.links_file, 'w') as f: 183 + json.dump(links_data, f, indent=2, ensure_ascii=False) 184 + except Exception: 185 + # Link processing failure shouldn't break the main operation 186 + pass 187 + 188 + def _consolidate_links(self): 189 + """Consolidate and create reverse link mappings.""" 190 + links_data = self._load_links_data() 191 + 192 + # Create URL to entries mapping 193 + url_mapping = defaultdict(list) 194 + 195 + for entry_key, entry_data in links_data.items(): 196 + for link in entry_data.get('links', []): 197 + url_mapping[link['url']].append({ 198 + 'entry_key': entry_key, 199 + 'username': entry_data['username'], 200 + 'entry_id': entry_data['entry_id'], 201 + 'title': entry_data['title'], 202 + 'link_text': link['text'], 203 + 'link_type': link['type'], 204 + }) 205 + 206 + # Save URL mapping 207 + url_mapping_file = self.git_store.repo_path / "url_mapping.json" 208 + try: 209 + with open(url_mapping_file, 'w') as f: 210 + json.dump(dict(url_mapping), f, indent=2, ensure_ascii=False) 211 + except Exception: 212 + pass 213 + 214 + def get_links(self, username: Optional[str] = None) -> dict: 215 + """Get processed links.""" 216 + links_data = self._load_links_data() 217 + 218 + if username: 219 + user_links = {k: v for k, v in links_data.items() if v.get('username') == username} 220 + return user_links 221 + 222 + return links_data 223 + 224 + def find_references(self, url: str) -> list[tuple[str, AtomEntry]]: 225 + """Find entries that reference a URL.""" 226 + url_mapping_file = self.git_store.repo_path / "url_mapping.json" 227 + 228 + if not url_mapping_file.exists(): 229 + return [] 230 + 231 + try: 232 + with open(url_mapping_file) as f: 233 + url_mapping = json.load(f) 234 + 235 + references = url_mapping.get(url, []) 236 + results = [] 237 + 238 + for ref in references: 239 + entry = self.git_store.get_entry(ref['username'], ref['entry_id']) 240 + if entry: 241 + results.append((ref['username'], entry)) 242 + 243 + return results 244 + except Exception: 245 + return [] 246 + 247 + def get_stats(self) -> dict: 248 + """Get link processing statistics.""" 249 + links_data = self._load_links_data() 250 + 251 + total_entries_with_links = len(links_data) 252 + total_links = sum(len(entry_data.get('links', [])) for entry_data in links_data.values()) 253 + 254 + external_links = 0 255 + internal_links = 0 256 + 257 + for entry_data in links_data.values(): 258 + for link in entry_data.get('links', []): 259 + if self._is_external_link(link['url']): 260 + external_links += 1 261 + else: 262 + internal_links += 1 263 + 264 + # Count unique URLs 265 + unique_urls = set() 266 + for entry_data in links_data.values(): 267 + for link in entry_data.get('links', []): 268 + unique_urls.add(link['url']) 269 + 270 + return { 271 + 'entries_with_links': total_entries_with_links, 272 + 'total_links': total_links, 273 + 'unique_urls': len(unique_urls), 274 + 'external_links': external_links, 275 + 'internal_links': internal_links, 276 + } 277 + 278 + def get_most_referenced_urls(self, limit: int = 10) -> list[dict]: 279 + """Get most frequently referenced URLs.""" 280 + url_mapping_file = self.git_store.repo_path / "url_mapping.json" 281 + 282 + if not url_mapping_file.exists(): 283 + return [] 284 + 285 + try: 286 + with open(url_mapping_file) as f: 287 + url_mapping = json.load(f) 288 + 289 + # Count references per URL 290 + url_counts = [(url, len(refs)) for url, refs in url_mapping.items()] 291 + url_counts.sort(key=lambda x: x[1], reverse=True) 292 + 293 + results = [] 294 + for url, count in url_counts[:limit]: 295 + results.append({ 296 + 'url': url, 297 + 'reference_count': count, 298 + 'is_external': self._is_external_link(url), 299 + 'references': url_mapping[url] 300 + }) 301 + 302 + return results 303 + except Exception: 304 + return []

+158

src/thicket/subsystems/repository.py

··· 1 + """Repository management subsystem.""" 2 + 3 + import shutil 4 + from datetime import datetime 5 + from pathlib import Path 6 + from typing import Optional 7 + 8 + from ..core.git_store import GitStore 9 + from ..models import ThicketConfig 10 + 11 + 12 + class RepositoryManager: 13 + """Manages repository operations and metadata.""" 14 + 15 + def __init__(self, git_store: GitStore, config: ThicketConfig): 16 + """Initialize repository manager.""" 17 + self.git_store = git_store 18 + self.config = config 19 + 20 + def init_repository(self) -> bool: 21 + """Initialize the git repository if not already done.""" 22 + try: 23 + # GitStore.__init__ already handles repository initialization 24 + return True 25 + except Exception: 26 + return False 27 + 28 + def commit_changes(self, message: str) -> bool: 29 + """Commit all pending changes.""" 30 + try: 31 + self.git_store.commit_changes(message) 32 + return True 33 + except Exception: 34 + return False 35 + 36 + def get_status(self) -> dict: 37 + """Get repository status and statistics.""" 38 + try: 39 + stats = self.git_store.get_stats() 40 + 41 + # Add repository-specific information 42 + repo_status = { 43 + **stats, 44 + 'repository_path': str(self.config.git_store), 45 + 'cache_path': str(self.config.cache_dir), 46 + 'has_uncommitted_changes': self._has_uncommitted_changes(), 47 + 'last_commit': self._get_last_commit_info(), 48 + } 49 + 50 + return repo_status 51 + except Exception as e: 52 + return {'error': str(e)} 53 + 54 + def backup_repository(self, backup_path: Path) -> bool: 55 + """Create a backup of the repository.""" 56 + try: 57 + if backup_path.exists(): 58 + shutil.rmtree(backup_path) 59 + 60 + shutil.copytree(self.config.git_store, backup_path) 61 + return True 62 + except Exception: 63 + return False 64 + 65 + def cleanup_cache(self) -> bool: 66 + """Clean up cache directory.""" 67 + try: 68 + if self.config.cache_dir.exists(): 69 + shutil.rmtree(self.config.cache_dir) 70 + self.config.cache_dir.mkdir(parents=True, exist_ok=True) 71 + return True 72 + except Exception: 73 + return False 74 + 75 + def get_repository_size(self) -> dict: 76 + """Get detailed repository size information.""" 77 + try: 78 + total_size = 0 79 + file_count = 0 80 + dir_count = 0 81 + 82 + for path in self.config.git_store.rglob("*"): 83 + if path.is_file(): 84 + total_size += path.stat().st_size 85 + file_count += 1 86 + elif path.is_dir(): 87 + dir_count += 1 88 + 89 + return { 90 + 'total_size_bytes': total_size, 91 + 'total_size_mb': round(total_size / (1024 * 1024), 2), 92 + 'file_count': file_count, 93 + 'directory_count': dir_count, 94 + } 95 + except Exception as e: 96 + return {'error': str(e)} 97 + 98 + def _has_uncommitted_changes(self) -> bool: 99 + """Check if there are uncommitted changes.""" 100 + try: 101 + if not self.git_store.repo: 102 + return False 103 + return bool(self.git_store.repo.index.diff("HEAD") or self.git_store.repo.untracked_files) 104 + except Exception: 105 + return False 106 + 107 + def _get_last_commit_info(self) -> Optional[dict]: 108 + """Get information about the last commit.""" 109 + try: 110 + if not self.git_store.repo: 111 + return None 112 + 113 + last_commit = self.git_store.repo.head.commit 114 + return { 115 + 'hash': last_commit.hexsha[:8], 116 + 'message': last_commit.message.strip(), 117 + 'author': str(last_commit.author), 118 + 'date': datetime.fromtimestamp(last_commit.committed_date).isoformat(), 119 + } 120 + except Exception: 121 + return None 122 + 123 + def verify_integrity(self) -> dict: 124 + """Verify repository integrity.""" 125 + issues = [] 126 + 127 + # Check if git repository is valid 128 + try: 129 + if not self.git_store.repo: 130 + issues.append("Git repository not initialized") 131 + except Exception as e: 132 + issues.append(f"Git repository error: {e}") 133 + 134 + # Check if index.json exists and is valid 135 + index_path = self.config.git_store / "index.json" 136 + if not index_path.exists(): 137 + issues.append("index.json missing") 138 + else: 139 + try: 140 + self.git_store._load_index() 141 + except Exception as e: 142 + issues.append(f"index.json corrupted: {e}") 143 + 144 + # Check if duplicates.json exists 145 + duplicates_path = self.config.git_store / "duplicates.json" 146 + if not duplicates_path.exists(): 147 + issues.append("duplicates.json missing") 148 + else: 149 + try: 150 + self.git_store._load_duplicates() 151 + except Exception as e: 152 + issues.append(f"duplicates.json corrupted: {e}") 153 + 154 + return { 155 + 'is_valid': len(issues) == 0, 156 + 'issues': issues, 157 + 'checked_at': datetime.now().isoformat(), 158 + }

+319

src/thicket/subsystems/site.py

··· 1 + """Site generation subsystem.""" 2 + 3 + import json 4 + import shutil 5 + from datetime import datetime 6 + from pathlib import Path 7 + from typing import Optional 8 + 9 + from jinja2 import Environment, FileSystemLoader, select_autoescape 10 + 11 + from ..core.git_store import GitStore 12 + from ..models import ThicketConfig 13 + 14 + 15 + class SiteGenerator: 16 + """Generates static sites from stored entries.""" 17 + 18 + def __init__(self, git_store: GitStore, config: ThicketConfig): 19 + """Initialize site generator.""" 20 + self.git_store = git_store 21 + self.config = config 22 + self.default_template_dir = Path(__file__).parent.parent / "templates" 23 + 24 + def generate_site(self, output_dir: Path, template_dir: Optional[Path] = None) -> bool: 25 + """Generate complete static site.""" 26 + try: 27 + # Setup template environment 28 + template_dir = template_dir or self.default_template_dir 29 + if not template_dir.exists(): 30 + return False 31 + 32 + env = Environment( 33 + loader=FileSystemLoader(str(template_dir)), 34 + autoescape=select_autoescape(['html', 'xml']) 35 + ) 36 + 37 + # Prepare output directory 38 + output_dir.mkdir(parents=True, exist_ok=True) 39 + 40 + # Copy static assets 41 + self._copy_static_assets(template_dir, output_dir) 42 + 43 + # Generate pages 44 + self._generate_index_page(env, output_dir) 45 + self._generate_timeline_page(env, output_dir) 46 + self._generate_users_page(env, output_dir) 47 + self._generate_links_page(env, output_dir) 48 + self._generate_user_detail_pages(env, output_dir) 49 + 50 + return True 51 + except Exception: 52 + return False 53 + 54 + def generate_timeline(self, output_path: Path, limit: Optional[int] = None) -> bool: 55 + """Generate timeline HTML file.""" 56 + try: 57 + env = Environment( 58 + loader=FileSystemLoader(str(self.default_template_dir)), 59 + autoescape=select_autoescape(['html', 'xml']) 60 + ) 61 + 62 + timeline_data = self._get_timeline_data(limit) 63 + template = env.get_template('timeline.html') 64 + 65 + content = template.render(**timeline_data) 66 + 67 + output_path.parent.mkdir(parents=True, exist_ok=True) 68 + with open(output_path, 'w', encoding='utf-8') as f: 69 + f.write(content) 70 + 71 + return True 72 + except Exception: 73 + return False 74 + 75 + def generate_user_pages(self, output_dir: Path) -> bool: 76 + """Generate individual user pages.""" 77 + try: 78 + env = Environment( 79 + loader=FileSystemLoader(str(self.default_template_dir)), 80 + autoescape=select_autoescape(['html', 'xml']) 81 + ) 82 + 83 + return self._generate_user_detail_pages(env, output_dir) 84 + except Exception: 85 + return False 86 + 87 + def _copy_static_assets(self, template_dir: Path, output_dir: Path): 88 + """Copy CSS, JS, and other static assets.""" 89 + static_files = ['style.css', 'script.js'] 90 + 91 + for filename in static_files: 92 + src_file = template_dir / filename 93 + if src_file.exists(): 94 + dst_file = output_dir / filename 95 + shutil.copy2(src_file, dst_file) 96 + 97 + def _generate_index_page(self, env: Environment, output_dir: Path): 98 + """Generate main index page.""" 99 + template = env.get_template('index.html') 100 + 101 + # Get summary statistics 102 + stats = self.git_store.get_stats() 103 + index = self.git_store._load_index() 104 + 105 + # Recent entries 106 + recent_entries = [] 107 + for username in index.users.keys(): 108 + user_entries = self.git_store.list_entries(username, limit=5) 109 + for entry in user_entries: 110 + recent_entries.append({ 111 + 'username': username, 112 + 'entry': entry 113 + }) 114 + 115 + # Sort by date 116 + recent_entries.sort(key=lambda x: x['entry'].updated or x['entry'].published, reverse=True) 117 + recent_entries = recent_entries[:10] 118 + 119 + context = { 120 + 'title': 'Thicket Feed Archive', 121 + 'stats': stats, 122 + 'recent_entries': recent_entries, 123 + 'users': list(index.users.values()), 124 + 'generated_at': datetime.now().isoformat(), 125 + } 126 + 127 + content = template.render(**context) 128 + 129 + with open(output_dir / 'index.html', 'w', encoding='utf-8') as f: 130 + f.write(content) 131 + 132 + def _generate_timeline_page(self, env: Environment, output_dir: Path): 133 + """Generate timeline page.""" 134 + template = env.get_template('timeline.html') 135 + timeline_data = self._get_timeline_data() 136 + 137 + content = template.render(**timeline_data) 138 + 139 + with open(output_dir / 'timeline.html', 'w', encoding='utf-8') as f: 140 + f.write(content) 141 + 142 + def _generate_users_page(self, env: Environment, output_dir: Path): 143 + """Generate users overview page.""" 144 + template = env.get_template('users.html') 145 + 146 + index = self.git_store._load_index() 147 + users_data = [] 148 + 149 + for user_metadata in index.users.values(): 150 + # Get user config for additional details 151 + user_config = next( 152 + (u for u in self.config.users if u.username == user_metadata.username), 153 + None 154 + ) 155 + 156 + # Get recent entries 157 + recent_entries = self.git_store.list_entries(user_metadata.username, limit=3) 158 + 159 + users_data.append({ 160 + 'metadata': user_metadata, 161 + 'config': user_config, 162 + 'recent_entries': recent_entries, 163 + }) 164 + 165 + # Sort by entry count 166 + users_data.sort(key=lambda x: x['metadata'].entry_count, reverse=True) 167 + 168 + context = { 169 + 'title': 'Users', 170 + 'users': users_data, 171 + 'generated_at': datetime.now().isoformat(), 172 + } 173 + 174 + content = template.render(**context) 175 + 176 + with open(output_dir / 'users.html', 'w', encoding='utf-8') as f: 177 + f.write(content) 178 + 179 + def _generate_links_page(self, env: Environment, output_dir: Path): 180 + """Generate links overview page.""" 181 + template = env.get_template('links.html') 182 + 183 + # Load links data 184 + links_file = self.git_store.repo_path / "links.json" 185 + url_mapping_file = self.git_store.repo_path / "url_mapping.json" 186 + 187 + links_data = {} 188 + url_mapping = {} 189 + 190 + if links_file.exists(): 191 + try: 192 + with open(links_file) as f: 193 + links_data = json.load(f) 194 + except Exception: 195 + pass 196 + 197 + if url_mapping_file.exists(): 198 + try: 199 + with open(url_mapping_file) as f: 200 + url_mapping = json.load(f) 201 + except Exception: 202 + pass 203 + 204 + # Process most referenced URLs 205 + url_counts = [(url, len(refs)) for url, refs in url_mapping.items()] 206 + url_counts.sort(key=lambda x: x[1], reverse=True) 207 + most_referenced = url_counts[:20] 208 + 209 + # Count links by type 210 + link_stats = { 211 + 'total_entries_with_links': len(links_data), 212 + 'total_links': sum(len(entry_data.get('links', [])) for entry_data in links_data.values()), 213 + 'unique_urls': len(url_mapping), 214 + } 215 + 216 + context = { 217 + 'title': 'Links', 218 + 'most_referenced': most_referenced, 219 + 'url_mapping': url_mapping, 220 + 'link_stats': link_stats, 221 + 'generated_at': datetime.now().isoformat(), 222 + } 223 + 224 + content = template.render(**context) 225 + 226 + with open(output_dir / 'links.html', 'w', encoding='utf-8') as f: 227 + f.write(content) 228 + 229 + def _generate_user_detail_pages(self, env: Environment, output_dir: Path) -> bool: 230 + """Generate individual user detail pages.""" 231 + try: 232 + template = env.get_template('user_detail.html') 233 + index = self.git_store._load_index() 234 + 235 + # Create users subdirectory 236 + users_dir = output_dir / 'users' 237 + users_dir.mkdir(exist_ok=True) 238 + 239 + for user_metadata in index.users.values(): 240 + user_config = next( 241 + (u for u in self.config.users if u.username == user_metadata.username), 242 + None 243 + ) 244 + 245 + entries = self.git_store.list_entries(user_metadata.username) 246 + 247 + # Get user's links 248 + links_file = self.git_store.repo_path / "links.json" 249 + user_links = [] 250 + if links_file.exists(): 251 + try: 252 + with open(links_file) as f: 253 + all_links = json.load(f) 254 + user_links = [ 255 + data for key, data in all_links.items() 256 + if data.get('username') == user_metadata.username 257 + ] 258 + except Exception: 259 + pass 260 + 261 + context = { 262 + 'title': f"{user_metadata.display_name or user_metadata.username}", 263 + 'user_metadata': user_metadata, 264 + 'user_config': user_config, 265 + 'entries': entries, 266 + 'user_links': user_links, 267 + 'generated_at': datetime.now().isoformat(), 268 + } 269 + 270 + content = template.render(**context) 271 + 272 + user_file = users_dir / f"{user_metadata.username}.html" 273 + with open(user_file, 'w', encoding='utf-8') as f: 274 + f.write(content) 275 + 276 + return True 277 + except Exception: 278 + return False 279 + 280 + def _get_timeline_data(self, limit: Optional[int] = None) -> dict: 281 + """Get data for timeline page.""" 282 + index = self.git_store._load_index() 283 + 284 + # Collect all entries with metadata 285 + all_entries = [] 286 + for user_metadata in index.users.values(): 287 + user_entries = self.git_store.list_entries(user_metadata.username) 288 + for entry in user_entries: 289 + all_entries.append({ 290 + 'username': user_metadata.username, 291 + 'display_name': user_metadata.display_name, 292 + 'entry': entry, 293 + }) 294 + 295 + # Sort by date (newest first) 296 + all_entries.sort( 297 + key=lambda x: x['entry'].updated or x['entry'].published or datetime.min, 298 + reverse=True 299 + ) 300 + 301 + if limit: 302 + all_entries = all_entries[:limit] 303 + 304 + # Group by date for timeline display 305 + timeline_groups = {} 306 + for item in all_entries: 307 + entry_date = item['entry'].updated or item['entry'].published 308 + if entry_date: 309 + date_key = entry_date.strftime('%Y-%m-%d') 310 + if date_key not in timeline_groups: 311 + timeline_groups[date_key] = [] 312 + timeline_groups[date_key].append(item) 313 + 314 + return { 315 + 'title': 'Timeline', 316 + 'timeline_groups': timeline_groups, 317 + 'total_entries': len(all_entries), 318 + 'generated_at': datetime.now().isoformat(), 319 + }

+254

src/thicket/subsystems/users.py

··· 1 + """User management subsystem.""" 2 + 3 + import shutil 4 + from typing import Optional 5 + 6 + from pydantic import EmailStr, HttpUrl, ValidationError 7 + 8 + from ..core.git_store import GitStore 9 + from ..models import ThicketConfig, UserConfig, UserMetadata 10 + 11 + 12 + class UserManager: 13 + """Manages user operations and metadata.""" 14 + 15 + def __init__(self, git_store: GitStore, config: ThicketConfig): 16 + """Initialize user manager.""" 17 + self.git_store = git_store 18 + self.config = config 19 + 20 + def add_user(self, username: str, feeds: list[str], **kwargs) -> UserConfig: 21 + """Add a new user with feeds.""" 22 + # Validate feeds 23 + validated_feeds = [] 24 + for feed in feeds: 25 + try: 26 + validated_feeds.append(HttpUrl(feed)) 27 + except ValidationError as e: 28 + raise ValueError(f"Invalid feed URL '{feed}': {e}") 29 + 30 + # Validate optional fields 31 + email = None 32 + if 'email' in kwargs and kwargs['email']: 33 + try: 34 + email = EmailStr(kwargs['email']) 35 + except ValidationError as e: 36 + raise ValueError(f"Invalid email '{kwargs['email']}': {e}") 37 + 38 + homepage = None 39 + if 'homepage' in kwargs and kwargs['homepage']: 40 + try: 41 + homepage = HttpUrl(kwargs['homepage']) 42 + except ValidationError as e: 43 + raise ValueError(f"Invalid homepage URL '{kwargs['homepage']}': {e}") 44 + 45 + icon = None 46 + if 'icon' in kwargs and kwargs['icon']: 47 + try: 48 + icon = HttpUrl(kwargs['icon']) 49 + except ValidationError as e: 50 + raise ValueError(f"Invalid icon URL '{kwargs['icon']}': {e}") 51 + 52 + # Create user config 53 + user_config = UserConfig( 54 + username=username, 55 + feeds=validated_feeds, 56 + email=email, 57 + homepage=homepage, 58 + icon=icon, 59 + display_name=kwargs.get('display_name') 60 + ) 61 + 62 + # Add to git store 63 + self.git_store.add_user( 64 + username=username, 65 + display_name=user_config.display_name, 66 + email=str(user_config.email) if user_config.email else None, 67 + homepage=str(user_config.homepage) if user_config.homepage else None, 68 + icon=str(user_config.icon) if user_config.icon else None, 69 + feeds=[str(feed) for feed in user_config.feeds] 70 + ) 71 + 72 + # Add to config if not already present 73 + existing_user = next((u for u in self.config.users if u.username == username), None) 74 + if not existing_user: 75 + self.config.users.append(user_config) 76 + else: 77 + # Update existing config 78 + existing_user.feeds = user_config.feeds 79 + existing_user.email = user_config.email 80 + existing_user.homepage = user_config.homepage 81 + existing_user.icon = user_config.icon 82 + existing_user.display_name = user_config.display_name 83 + 84 + return user_config 85 + 86 + def get_user(self, username: str) -> Optional[UserConfig]: 87 + """Get user configuration.""" 88 + return next((u for u in self.config.users if u.username == username), None) 89 + 90 + def get_user_metadata(self, username: str) -> Optional[UserMetadata]: 91 + """Get user metadata from git store.""" 92 + return self.git_store.get_user(username) 93 + 94 + def list_users(self) -> list[UserConfig]: 95 + """List all configured users.""" 96 + return self.config.users.copy() 97 + 98 + def list_users_with_metadata(self) -> list[tuple[UserConfig, Optional[UserMetadata]]]: 99 + """List users with their git store metadata.""" 100 + result = [] 101 + for user_config in self.config.users: 102 + metadata = self.git_store.get_user(user_config.username) 103 + result.append((user_config, metadata)) 104 + return result 105 + 106 + def update_user(self, username: str, **kwargs) -> bool: 107 + """Update user configuration.""" 108 + # Update in config 109 + user_config = self.get_user(username) 110 + if not user_config: 111 + return False 112 + 113 + # Validate and update feeds if provided 114 + if 'feeds' in kwargs: 115 + validated_feeds = [] 116 + for feed in kwargs['feeds']: 117 + try: 118 + validated_feeds.append(HttpUrl(feed)) 119 + except ValidationError: 120 + return False 121 + user_config.feeds = validated_feeds 122 + 123 + # Validate and update other fields 124 + if 'email' in kwargs and kwargs['email']: 125 + try: 126 + user_config.email = EmailStr(kwargs['email']) 127 + except ValidationError: 128 + return False 129 + elif 'email' in kwargs and not kwargs['email']: 130 + user_config.email = None 131 + 132 + if 'homepage' in kwargs and kwargs['homepage']: 133 + try: 134 + user_config.homepage = HttpUrl(kwargs['homepage']) 135 + except ValidationError: 136 + return False 137 + elif 'homepage' in kwargs and not kwargs['homepage']: 138 + user_config.homepage = None 139 + 140 + if 'icon' in kwargs and kwargs['icon']: 141 + try: 142 + user_config.icon = HttpUrl(kwargs['icon']) 143 + except ValidationError: 144 + return False 145 + elif 'icon' in kwargs and not kwargs['icon']: 146 + user_config.icon = None 147 + 148 + if 'display_name' in kwargs: 149 + user_config.display_name = kwargs['display_name'] or None 150 + 151 + # Update in git store 152 + git_kwargs = {} 153 + if 'feeds' in kwargs: 154 + git_kwargs['feeds'] = [str(feed) for feed in user_config.feeds] 155 + if user_config.email: 156 + git_kwargs['email'] = str(user_config.email) 157 + if user_config.homepage: 158 + git_kwargs['homepage'] = str(user_config.homepage) 159 + if user_config.icon: 160 + git_kwargs['icon'] = str(user_config.icon) 161 + if user_config.display_name: 162 + git_kwargs['display_name'] = user_config.display_name 163 + 164 + return self.git_store.update_user(username, **git_kwargs) 165 + 166 + def remove_user(self, username: str) -> bool: 167 + """Remove a user and their data.""" 168 + # Remove from config 169 + self.config.users = [u for u in self.config.users if u.username != username] 170 + 171 + # Remove user directory from git store 172 + user_metadata = self.git_store.get_user(username) 173 + if user_metadata: 174 + user_dir = self.git_store.repo_path / user_metadata.directory 175 + if user_dir.exists(): 176 + try: 177 + shutil.rmtree(user_dir) 178 + except Exception: 179 + return False 180 + 181 + # Remove user from index 182 + index = self.git_store._load_index() 183 + if username in index.users: 184 + del index.users[username] 185 + self.git_store._save_index(index) 186 + 187 + return True 188 + 189 + def get_user_stats(self, username: str) -> Optional[dict]: 190 + """Get statistics for a specific user.""" 191 + user_metadata = self.git_store.get_user(username) 192 + if not user_metadata: 193 + return None 194 + 195 + user_config = self.get_user(username) 196 + entries = self.git_store.list_entries(username) 197 + 198 + return { 199 + 'username': username, 200 + 'display_name': user_metadata.display_name, 201 + 'entry_count': user_metadata.entry_count, 202 + 'feeds_configured': len(user_config.feeds) if user_config else 0, 203 + 'directory': user_metadata.directory, 204 + 'created': user_metadata.created.isoformat() if user_metadata.created else None, 205 + 'last_updated': user_metadata.last_updated.isoformat() if user_metadata.last_updated else None, 206 + 'latest_entry': entries[0].updated.isoformat() if entries else None, 207 + } 208 + 209 + def validate_user_feeds(self, username: str) -> dict: 210 + """Validate all feeds for a user.""" 211 + user_config = self.get_user(username) 212 + if not user_config: 213 + return {'error': 'User not found'} 214 + 215 + results = { 216 + 'username': username, 217 + 'total_feeds': len(user_config.feeds), 218 + 'valid_feeds': [], 219 + 'invalid_feeds': [], 220 + } 221 + 222 + for feed_url in user_config.feeds: 223 + try: 224 + # Basic URL validation - more comprehensive validation would require fetching 225 + HttpUrl(str(feed_url)) 226 + results['valid_feeds'].append(str(feed_url)) 227 + except ValidationError as e: 228 + results['invalid_feeds'].append({ 229 + 'url': str(feed_url), 230 + 'error': str(e) 231 + }) 232 + 233 + results['is_valid'] = len(results['invalid_feeds']) == 0 234 + 235 + return results 236 + 237 + def sync_config_with_git_store(self) -> bool: 238 + """Sync configuration users with git store.""" 239 + try: 240 + for user_config in self.config.users: 241 + git_user = self.git_store.get_user(user_config.username) 242 + if not git_user: 243 + # Add missing user to git store 244 + self.git_store.add_user( 245 + username=user_config.username, 246 + display_name=user_config.display_name, 247 + email=str(user_config.email) if user_config.email else None, 248 + homepage=str(user_config.homepage) if user_config.homepage else None, 249 + icon=str(user_config.icon) if user_config.icon else None, 250 + feeds=[str(feed) for feed in user_config.feeds] 251 + ) 252 + return True 253 + except Exception: 254 + return False

+169

src/thicket/templates/user_detail.html

··· 1 + {% extends "base.html" %} 2 + 3 + {% block title %}{{ title }} - Thicket{% endblock %} 4 + 5 + {% block content %} 6 + <div class="container mx-auto px-4 py-8"> 7 + <div class="max-w-4xl mx-auto"> 8 +  9 + <div class="bg-white rounded-lg shadow-md p-6 mb-6"> 10 + <div class="flex items-center space-x-4"> 11 + {% if user_config and user_config.icon %} 12 + <img src="{{ user_config.icon }}" alt="{{ title }}" class="w-16 h-16 rounded-full"> 13 + {% else %} 14 + <div class="w-16 h-16 rounded-full bg-blue-500 flex items-center justify-center text-white text-xl font-bold"> 15 + {{ user_metadata.username[0].upper() }} 16 + </div> 17 + {% endif %} 18 + 19 + <div> 20 + <h1 class="text-2xl font-bold text-gray-900">{{ title }}</h1> 21 + <p class="text-gray-600">@{{ user_metadata.username }}</p> 22 + {% if user_config and user_config.email %} 23 + <p class="text-sm text-gray-500">{{ user_config.email }}</p> 24 + {% endif %} 25 + </div> 26 + </div> 27 + 28 + {% if user_config and user_config.homepage %} 29 + <div class="mt-4"> 30 + <a href="{{ user_config.homepage }}" class="text-blue-600 hover:text-blue-800" target="_blank"> 31 + 🏠 Homepage 32 + </a> 33 + </div> 34 + {% endif %} 35 + 36 + <div class="mt-4 grid grid-cols-2 md:grid-cols-4 gap-4"> 37 + <div class="text-center"> 38 + <div class="text-2xl font-bold text-blue-600">{{ user_metadata.entry_count }}</div> 39 + <div class="text-sm text-gray-500">Entries</div> 40 + </div> 41 + 42 + {% if user_config %} 43 + <div class="text-center"> 44 + <div class="text-2xl font-bold text-green-600">{{ user_config.feeds|length }}</div> 45 + <div class="text-sm text-gray-500">Feeds</div> 46 + </div> 47 + {% endif %} 48 + 49 + <div class="text-center"> 50 + <div class="text-2xl font-bold text-purple-600">{{ user_links|length }}</div> 51 + <div class="text-sm text-gray-500">Link Groups</div> 52 + </div> 53 + 54 + <div class="text-center"> 55 + <div class="text-sm text-gray-500">Member since</div> 56 + <div class="text-sm font-medium">{{ user_metadata.created.strftime('%Y-%m-%d') if user_metadata.created else 'Unknown' }}</div> 57 + </div> 58 + </div> 59 + </div> 60 + 61 +  62 + {% if user_config and user_config.feeds %} 63 + <div class="bg-white rounded-lg shadow-md p-6 mb-6"> 64 + <h2 class="text-xl font-semibold mb-4">Feeds</h2> 65 + <div class="space-y-2"> 66 + {% for feed in user_config.feeds %} 67 + <div class="flex items-center space-x-2"> 68 + <span class="text-green-500">📡</span> 69 + <a href="{{ feed }}" class="text-blue-600 hover:text-blue-800" target="_blank">{{ feed }}</a> 70 + </div> 71 + {% endfor %} 72 + </div> 73 + </div> 74 + {% endif %} 75 + 76 +  77 + <div class="bg-white rounded-lg shadow-md p-6 mb-6"> 78 + <h2 class="text-xl font-semibold mb-4">Recent Entries</h2> 79 + 80 + {% if entries %} 81 + <div class="space-y-4"> 82 + {% for entry in entries[:10] %} 83 + <div class="border-l-4 border-blue-500 pl-4 py-2"> 84 + <h3 class="font-semibold text-lg"> 85 + <a href="{{ entry.link }}" class="text-blue-600 hover:text-blue-800" target="_blank"> 86 + {{ entry.title }} 87 + </a> 88 + </h3> 89 + 90 + <div class="text-sm text-gray-500 mb-2"> 91 + {% if entry.published %} 92 + Published: {{ entry.published.strftime('%Y-%m-%d %H:%M') }} 93 + {% endif %} 94 + {% if entry.updated and entry.updated != entry.published %} 95 + • Updated: {{ entry.updated.strftime('%Y-%m-%d %H:%M') }} 96 + {% endif %} 97 + </div> 98 + 99 + {% if entry.summary %} 100 + <div class="text-gray-700 mb-2"> 101 + {{ entry.summary|truncate(200) }} 102 + </div> 103 + {% endif %} 104 + 105 + {% if entry.categories %} 106 + <div class="flex flex-wrap gap-1"> 107 + {% for category in entry.categories %} 108 + <span class="px-2 py-1 bg-blue-100 text-blue-800 text-xs rounded">{{ category }}</span> 109 + {% endfor %} 110 + </div> 111 + {% endif %} 112 + </div> 113 + {% endfor %} 114 + </div> 115 + 116 + {% if entries|length > 10 %} 117 + <div class="mt-4 text-center"> 118 + <p class="text-gray-500">Showing 10 of {{ entries|length }} entries</p> 119 + </div> 120 + {% endif %} 121 + 122 + {% else %} 123 + <p class="text-gray-500">No entries found.</p> 124 + {% endif %} 125 + </div> 126 + 127 +  128 + {% if user_links %} 129 + <div class="bg-white rounded-lg shadow-md p-6"> 130 + <h2 class="text-xl font-semibold mb-4">Link Activity</h2> 131 + 132 + <div class="space-y-3"> 133 + {% for link_group in user_links[:5] %} 134 + <div class="border-l-4 border-green-500 pl-4"> 135 + <h3 class="font-medium">{{ link_group.title }}</h3> 136 + <div class="text-sm text-gray-500 mb-2"> 137 + {{ link_group.links|length }} link(s) found 138 + </div> 139 + 140 + <div class="space-y-1"> 141 + {% for link in link_group.links[:3] %} 142 + <div class="text-sm"> 143 + <a href="{{ link.url }}" class="text-blue-600 hover:text-blue-800" target="_blank"> 144 + {{ link.text or link.url }} 145 + </a> 146 + <span class="text-gray-400 ml-2">({{ link.type }})</span> 147 + </div> 148 + {% endfor %} 149 + 150 + {% if link_group.links|length > 3 %} 151 + <div class="text-sm text-gray-500"> 152 + ... and {{ link_group.links|length - 3 }} more 153 + </div> 154 + {% endif %} 155 + </div> 156 + </div> 157 + {% endfor %} 158 + </div> 159 + 160 + {% if user_links|length > 5 %} 161 + <div class="mt-4 text-center"> 162 + <p class="text-gray-500">Showing 5 of {{ user_links|length }} entries with links</p> 163 + </div> 164 + {% endif %} 165 + </div> 166 + {% endif %} 167 + </div> 168 + </div> 169 + {% endblock %}

+230

src/thicket/thicket.py

··· 1 + """Main Thicket library class providing unified API.""" 2 + 3 + import asyncio 4 + from datetime import datetime 5 + from pathlib import Path 6 + from typing import Optional, Union 7 + 8 + from pydantic import HttpUrl 9 + 10 + from .core.feed_parser import FeedParser 11 + from .core.git_store import GitStore 12 + from .models import AtomEntry, ThicketConfig, UserConfig 13 + from .subsystems.feeds import FeedManager 14 + from .subsystems.links import LinkProcessor 15 + from .subsystems.repository import RepositoryManager 16 + from .subsystems.site import SiteGenerator 17 + from .subsystems.users import UserManager 18 + 19 + 20 + class Thicket: 21 + """ 22 + Main Thicket class providing unified API for feed management. 23 + 24 + This class serves as the primary interface for all Thicket operations, 25 + consolidating configuration, repository management, feed processing, 26 + user management, link processing, and site generation. 27 + """ 28 + 29 + def __init__(self, config: Union[ThicketConfig, Path, str]): 30 + """ 31 + Initialize Thicket with configuration. 32 + 33 + Args: 34 + config: Either a ThicketConfig object, or path to config file 35 + """ 36 + if isinstance(config, (Path, str)): 37 + self.config = ThicketConfig.from_file(Path(config)) 38 + else: 39 + self.config = config 40 + 41 + # Initialize subsystems 42 + self._init_subsystems() 43 + 44 + def _init_subsystems(self): 45 + """Initialize all subsystems.""" 46 + # Core components 47 + self.git_store = GitStore(self.config.git_store) 48 + self.feed_parser = FeedParser() 49 + 50 + # Subsystem managers 51 + self.repository = RepositoryManager(self.git_store, self.config) 52 + self.users = UserManager(self.git_store, self.config) 53 + self.feeds = FeedManager(self.git_store, self.feed_parser, self.config) 54 + self.links = LinkProcessor(self.git_store, self.config) 55 + self.site = SiteGenerator(self.git_store, self.config) 56 + 57 + @classmethod 58 + def create(cls, git_store: Path, cache_dir: Path, users: Optional[list[UserConfig]] = None) -> 'Thicket': 59 + """ 60 + Create a new Thicket instance with minimal configuration. 61 + 62 + Args: 63 + git_store: Path to git repository 64 + cache_dir: Path to cache directory 65 + users: Optional list of user configurations 66 + 67 + Returns: 68 + Configured Thicket instance 69 + """ 70 + config = ThicketConfig( 71 + git_store=git_store, 72 + cache_dir=cache_dir, 73 + users=users or [] 74 + ) 75 + return cls(config) 76 + 77 + @classmethod 78 + def from_config_file(cls, config_path: Path) -> 'Thicket': 79 + """Load Thicket from configuration file.""" 80 + return cls(config_path) 81 + 82 + # User Management API 83 + def add_user(self, username: str, feeds: list[str], **kwargs) -> UserConfig: 84 + """Add a new user with feeds.""" 85 + return self.users.add_user(username, feeds, **kwargs) 86 + 87 + def get_user(self, username: str) -> Optional[UserConfig]: 88 + """Get user configuration.""" 89 + return self.users.get_user(username) 90 + 91 + def list_users(self) -> list[UserConfig]: 92 + """List all configured users.""" 93 + return self.users.list_users() 94 + 95 + def update_user(self, username: str, **kwargs) -> bool: 96 + """Update user configuration.""" 97 + return self.users.update_user(username, **kwargs) 98 + 99 + def remove_user(self, username: str) -> bool: 100 + """Remove a user and their data.""" 101 + return self.users.remove_user(username) 102 + 103 + # Feed Management API 104 + async def sync_feeds(self, username: Optional[str] = None, progress_callback=None) -> dict: 105 + """Sync feeds for user(s).""" 106 + return await self.feeds.sync_feeds(username, progress_callback) 107 + 108 + async def sync_user_feeds(self, username: str, progress_callback=None) -> dict: 109 + """Sync feeds for a specific user.""" 110 + return await self.feeds.sync_user_feeds(username, progress_callback) 111 + 112 + def get_entries(self, username: str, limit: Optional[int] = None) -> list[AtomEntry]: 113 + """Get entries for a user.""" 114 + return self.feeds.get_entries(username, limit) 115 + 116 + def get_entry(self, username: str, entry_id: str) -> Optional[AtomEntry]: 117 + """Get a specific entry.""" 118 + return self.feeds.get_entry(username, entry_id) 119 + 120 + def search_entries(self, query: str, username: Optional[str] = None, limit: Optional[int] = None) -> list[tuple[str, AtomEntry]]: 121 + """Search entries across users.""" 122 + return self.feeds.search_entries(query, username, limit) 123 + 124 + # Repository Management API 125 + def init_repository(self) -> bool: 126 + """Initialize the git repository.""" 127 + return self.repository.init_repository() 128 + 129 + def commit_changes(self, message: str) -> bool: 130 + """Commit all pending changes.""" 131 + return self.repository.commit_changes(message) 132 + 133 + def get_status(self) -> dict: 134 + """Get repository status and statistics.""" 135 + return self.repository.get_status() 136 + 137 + def backup_repository(self, backup_path: Path) -> bool: 138 + """Create a backup of the repository.""" 139 + return self.repository.backup_repository(backup_path) 140 + 141 + # Link Processing API 142 + def process_links(self, username: Optional[str] = None) -> dict: 143 + """Process and extract links from entries.""" 144 + return self.links.process_links(username) 145 + 146 + def get_links(self, username: Optional[str] = None) -> dict: 147 + """Get processed links.""" 148 + return self.links.get_links(username) 149 + 150 + def find_references(self, url: str) -> list[tuple[str, AtomEntry]]: 151 + """Find entries that reference a URL.""" 152 + return self.links.find_references(url) 153 + 154 + # Site Generation API 155 + def generate_site(self, output_dir: Path, template_dir: Optional[Path] = None) -> bool: 156 + """Generate static site.""" 157 + return self.site.generate_site(output_dir, template_dir) 158 + 159 + def generate_timeline(self, output_path: Path, limit: Optional[int] = None) -> bool: 160 + """Generate timeline HTML.""" 161 + return self.site.generate_timeline(output_path, limit) 162 + 163 + def generate_user_pages(self, output_dir: Path) -> bool: 164 + """Generate individual user pages.""" 165 + return self.site.generate_user_pages(output_dir) 166 + 167 + # Utility Methods 168 + def get_stats(self) -> dict: 169 + """Get comprehensive statistics.""" 170 + base_stats = self.repository.get_status() 171 + feed_stats = self.feeds.get_stats() 172 + link_stats = self.links.get_stats() 173 + 174 + return { 175 + **base_stats, 176 + **feed_stats, 177 + **link_stats, 178 + 'config': { 179 + 'git_store': str(self.config.git_store), 180 + 'cache_dir': str(self.config.cache_dir), 181 + 'total_users_configured': len(self.config.users), 182 + } 183 + } 184 + 185 + async def full_sync(self, progress_callback=None) -> dict: 186 + """Perform a complete sync: feeds -> links -> commit.""" 187 + results = {} 188 + 189 + # Sync feeds 190 + results['feeds'] = await self.sync_feeds(progress_callback=progress_callback) 191 + 192 + # Process links 193 + results['links'] = self.process_links() 194 + 195 + # Commit changes 196 + message = f"Sync completed at {datetime.now().isoformat()}" 197 + results['committed'] = self.commit_changes(message) 198 + 199 + return results 200 + 201 + def validate_config(self) -> list[str]: 202 + """Validate configuration and return any errors.""" 203 + errors = [] 204 + 205 + # Check paths exist 206 + if not self.config.git_store.parent.exists(): 207 + errors.append(f"Git store parent directory does not exist: {self.config.git_store.parent}") 208 + 209 + if not self.config.cache_dir.parent.exists(): 210 + errors.append(f"Cache directory parent does not exist: {self.config.cache_dir.parent}") 211 + 212 + # Validate user configs 213 + for user in self.config.users: 214 + if not user.feeds: 215 + errors.append(f"User {user.username} has no feeds configured") 216 + 217 + for feed_url in user.feeds: 218 + # Basic URL validation is handled by pydantic 219 + pass 220 + 221 + return errors 222 + 223 + def __enter__(self): 224 + """Context manager entry.""" 225 + return self 226 + 227 + def __exit__(self, exc_type, exc_val, exc_tb): 228 + """Context manager exit.""" 229 + # Could add cleanup logic here if needed 230 + pass

Configure Feed

Configure Feed