this repo has no description
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Unify links, references, and url_mapping JSON files into single structure

- Consolidate references.json, url_mapping.json, and links.json into unified links.json
- Eliminate duplication by storing each URL only once with metadata flags
- Add is_tracked_post flag and target_username to distinguish internal/external links
- Update all commands (links, index, threads, info) to use unified structure
- Maintain all existing functionality while reducing file I/O and complexity
- Update documentation and help text to reflect changes

๐Ÿค– Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

+127 -86
+28 -25
ARCH.md
··· 157 157 git-store/ 158 158 โ”œโ”€โ”€ index.json # User directory index 159 159 โ”œโ”€โ”€ duplicates.json # Manual curation of duplicate entries 160 - โ”œโ”€โ”€ links.json # All outbound links categorized by type 161 - โ”œโ”€โ”€ references.json # Cross-reference index for threading 160 + โ”œโ”€โ”€ links.json # Unified links, references, and mapping data 162 161 โ”œโ”€โ”€ user1/ 163 162 โ”‚ โ”œโ”€โ”€ entry_id_1.json # Sanitized entry files 164 163 โ”‚ โ”œโ”€โ”€ entry_id_2.json ··· 403 402 404 403 ### Data Structures 405 404 406 - #### links.json Format 405 + #### links.json Format (Unified Structure) 407 406 ```json 408 407 { 409 - "links": [ 410 - { 411 - "url": "https://example.com/post/123", 412 - "entry_id": "https://blog.user.com/entry/456", 413 - "username": "user1", 414 - "context": "As mentioned in this post...", 415 - "category": "user", 408 + "links": { 409 + "https://example.com/post/123": { 410 + "referencing_entries": ["https://blog.user.com/entry/456"], 411 + "is_tracked_post": true, 416 412 "target_username": "user2" 413 + }, 414 + "https://external-site.com/article": { 415 + "referencing_entries": ["https://blog.user.com/entry/789"], 416 + "is_tracked_post": false 417 417 } 418 - ], 419 - "categories": { 420 - "internal": 1234, 421 - "user": 456, 422 - "unknown": 7890 423 418 }, 424 - "user_domains": { 425 - "user1": ["blog.user.com", "user.com"], 426 - "user2": ["example.com"] 427 - } 428 - } 429 - ``` 430 - 431 - #### references.json Format 432 - ```json 433 - { 419 + "reverse_mapping": { 420 + "https://blog.user.com/entry/456": ["https://example.com/post/123"], 421 + "https://blog.user.com/entry/789": ["https://external-site.com/article"] 422 + }, 434 423 "references": [ 435 424 { 436 425 "source_entry_id": "https://blog.user.com/entry/456", ··· 447 436 } 448 437 } 449 438 ``` 439 + 440 + This unified structure eliminates duplication by: 441 + - Storing each URL only once with metadata flags 442 + - Including all link data, reference data, and mappings in one file 443 + - Using `is_tracked_post` to identify internal vs external links 444 + - Providing bidirectional mappings for efficient queries 445 + 446 + ### Unified Structure Benefits 447 + 448 + - **Eliminates Duplication**: Each URL appears only once with metadata 449 + - **Single Source of Truth**: All link-related data in one file 450 + - **Efficient Queries**: Fast lookups for both directions (URLโ†’entries, entryโ†’URLs) 451 + - **Atomic Updates**: All link data changes together 452 + - **Reduced I/O**: Fewer file operations 450 453 451 454 ### Implementation Benefits 452 455
+42 -11
src/thicket/cli/commands/index_cmd.py
··· 35 35 None, 36 36 "--output", 37 37 "-o", 38 - help="Path to output index file (default: references.json in git store)", 38 + help="Path to output index file (default: updates links.json in git store)", 39 39 ), 40 40 verbose: bool = typer.Option( 41 41 False, ··· 49 49 This command analyzes all blog entries to detect cross-references between 50 50 different blogs, creating an index that can be used to build threaded 51 51 views of related content. 52 + 53 + Updates the unified links.json file with reference data. 52 54 """ 53 55 try: 54 56 # Load configuration ··· 154 156 if output_file: 155 157 output_path = output_file 156 158 else: 157 - output_path = config.git_store / "references.json" 159 + output_path = config.git_store / "links.json" 160 + 161 + # Load existing links data or create new structure 162 + if output_path.exists() and not output_file: 163 + # Load existing unified structure 164 + with open(output_path) as f: 165 + existing_data = json.load(f) 166 + else: 167 + # Create new structure 168 + existing_data = { 169 + "links": {}, 170 + "reverse_mapping": {}, 171 + "user_domains": {} 172 + } 173 + 174 + # Update with reference data 175 + existing_data["references"] = ref_index.to_dict()["references"] 176 + existing_data["user_domains"] = {k: list(v) for k, v in user_domains.items()} 158 177 159 - # Save reference index 178 + # Save updated structure 160 179 with open(output_path, "w") as f: 161 - json.dump(ref_index.to_dict(), f, indent=2, default=str) 180 + json.dump(existing_data, f, indent=2, default=str) 162 181 163 182 # Show summary 164 183 if not get_tsv_mode(): ··· 248 267 None, 249 268 "--index", 250 269 "-i", 251 - help="Path to reference index file (default: references.json in git store)", 270 + help="Path to reference index file (default: links.json in git store)", 252 271 ), 253 272 username: Optional[str] = typer.Option( 254 273 None, ··· 274 293 This command uses the reference index to show which blog entries 275 294 are connected through cross-references, creating an email-style 276 295 threaded view of the conversation. 296 + 297 + Reads reference data from the unified links.json file. 277 298 """ 278 299 try: 279 300 # Load configuration ··· 283 304 if index_file: 284 305 index_path = index_file 285 306 else: 286 - index_path = config.git_store / "references.json" 307 + index_path = config.git_store / "links.json" 287 308 288 309 if not index_path.exists(): 289 - console.print(f"[red]Reference index not found: {index_path}[/red]") 290 - console.print("Run 'thicket index' first to build the reference index") 310 + console.print(f"[red]Links file not found: {index_path}[/red]") 311 + console.print("Run 'thicket links' and 'thicket index' first to build the reference index") 291 312 raise typer.Exit(1) 292 313 293 - # Load reference index 314 + # Load unified data 294 315 with open(index_path) as f: 295 - index_data = json.load(f) 316 + unified_data = json.load(f) 317 + 318 + # Check if references exist in the unified structure 319 + if "references" not in unified_data: 320 + console.print(f"[red]No references found in {index_path}[/red]") 321 + console.print("Run 'thicket index' first to build the reference index") 322 + raise typer.Exit(1) 296 323 297 - ref_index = ReferenceIndex.from_dict(index_data) 324 + # Extract reference data and reconstruct ReferenceIndex 325 + ref_index = ReferenceIndex.from_dict({ 326 + "references": unified_data["references"], 327 + "user_domains": unified_data.get("user_domains", {}) 328 + }) 298 329 299 330 # Initialize Git store to get entry details 300 331 git_store = GitStore(config.git_store)
+12 -6
src/thicket/cli/commands/info_cmd.py
··· 106 106 raise typer.Exit(1) 107 107 108 108 # Load reference index if available 109 - references_path = config.git_store / "references.json" 109 + links_path = config.git_store / "links.json" 110 110 ref_index = None 111 - if references_path.exists(): 112 - with open(references_path) as f: 113 - index_data = json.load(f) 114 - ref_index = ReferenceIndex.from_dict(index_data) 111 + if links_path.exists(): 112 + with open(links_path) as f: 113 + unified_data = json.load(f) 114 + 115 + # Check if references exist in the unified structure 116 + if "references" in unified_data: 117 + ref_index = ReferenceIndex.from_dict({ 118 + "references": unified_data["references"], 119 + "user_domains": unified_data.get("user_domains", {}) 120 + }) 115 121 116 122 # Display information 117 123 if get_tsv_mode(): ··· 122 128 if ref_index: 123 129 _display_link_info(entry, found_username, ref_index) 124 130 else: 125 - console.print("\n[yellow]No reference index found. Run 'thicket index' to build cross-reference data.[/yellow]") 131 + console.print("\n[yellow]No reference index found. Run 'thicket links' and 'thicket index' to build cross-reference data.[/yellow]") 126 132 127 133 # Optionally display content 128 134 if show_content and entry.content: