Command-line tool for managing your AT Protocol bookmarks. Works with kipclip.com and any app that uses the same record format. kipclip.com
atproto rust kipclip bookmarks tags toread atprotocol
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Move URL enrichment client-side

Fetch and parse HTML metadata directly in the CLI instead of calling
the kipclip.com appview. Extracts title, description, favicon, and
og:image using regex-lite. Makes the CLI fully independent — zero
reliance on kipclip.com beyond the user's own PDS.

+170 -16
+1
Cargo.lock
··· 2124 2124 "miette", 2125 2125 "open", 2126 2126 "owo-colors", 2127 + "regex-lite", 2127 2128 "reqwest", 2128 2129 "serde", 2129 2130 "serde_json",
+1
Cargo.toml
··· 25 25 unicode-width = "0.2" 26 26 terminal_size = "0.4" 27 27 jacquard-identity = "0.9" 28 + regex-lite = "0.1"
-5
src/kipclip/config.rs
··· 21 21 pub fn session_info_path() -> PathBuf { 22 22 config_dir().join("whoami.json") 23 23 } 24 - 25 - /// Appview base URL 26 - pub fn appview_url() -> String { 27 - std::env::var("KIPCLIP_APPVIEW_URL").unwrap_or_else(|_| "https://kipclip.com".to_string()) 28 - }
+168 -11
src/kipclip/enrich.rs
··· 1 1 use miette::{IntoDiagnostic, Result, miette}; 2 + use reqwest::header; 2 3 3 - use crate::kipclip::config; 4 4 use crate::kipclip::types::UrlMetadata; 5 5 6 - /// Call the kipclip appview /api/enrich endpoint 6 + const MAX_TITLE_LENGTH: usize = 200; 7 + const MAX_DESCRIPTION_LENGTH: usize = 500; 8 + const MAX_URL_LENGTH: usize = 2000; 9 + const TIMEOUT_SECS: u64 = 10; 10 + 11 + /// Fetch a URL and extract metadata (title, description, favicon, og:image) 7 12 pub async fn enrich_url(url: &str) -> Result<UrlMetadata> { 8 - let appview = config::appview_url(); 9 - let client = reqwest::Client::new(); 13 + let parsed = reqwest::Url::parse(url).map_err(|e| miette!("Invalid URL: {e}"))?; 14 + 15 + if !matches!(parsed.scheme(), "http" | "https") { 16 + return Err(miette!("Only HTTP(S) URLs are supported")); 17 + } 18 + 19 + let client = reqwest::Client::builder() 20 + .timeout(std::time::Duration::from_secs(TIMEOUT_SECS)) 21 + .build() 22 + .into_diagnostic()?; 23 + 10 24 let resp = client 11 - .post(format!("{appview}/api/enrich")) 12 - .json(&serde_json::json!({ "url": url })) 25 + .get(url) 26 + .header( 27 + header::USER_AGENT, 28 + "kipclip-bot/1.0 (Bookmark enrichment; +https://kipclip.com)", 29 + ) 13 30 .send() 14 31 .await 15 - .into_diagnostic()?; 32 + .map_err(|e| miette!("Failed to fetch URL: {e}"))?; 16 33 17 34 if !resp.status().is_success() { 18 - let status = resp.status(); 19 - let body = resp.text().await.unwrap_or_default(); 20 - return Err(miette!("Enrichment failed ({status}): {body}")); 35 + let hostname = parsed.host_str().unwrap_or(url); 36 + return Ok(UrlMetadata { 37 + title: Some(hostname.to_string()), 38 + description: None, 39 + favicon: Some(default_favicon(&parsed)), 40 + image: None, 41 + }); 42 + } 43 + 44 + let content_type = resp 45 + .headers() 46 + .get(header::CONTENT_TYPE) 47 + .and_then(|v| v.to_str().ok()) 48 + .unwrap_or(""); 49 + 50 + if !content_type.contains("text/html") { 51 + let hostname = parsed.host_str().unwrap_or(url); 52 + return Ok(UrlMetadata { 53 + title: Some(hostname.to_string()), 54 + description: None, 55 + favicon: Some(default_favicon(&parsed)), 56 + image: None, 57 + }); 58 + } 59 + 60 + let html = resp.text().await.into_diagnostic()?; 61 + Ok(parse_html_metadata(&html, &parsed)) 62 + } 63 + 64 + fn default_favicon(url: &reqwest::Url) -> String { 65 + format!("{}/favicon.ico", url.origin().ascii_serialization()) 66 + } 67 + 68 + fn sanitize_text(text: &str, max_len: usize) -> String { 69 + text.trim() 70 + .replace(|c: char| c.is_control(), "") 71 + .split_whitespace() 72 + .collect::<Vec<_>>() 73 + .join(" ") 74 + .chars() 75 + .take(max_len) 76 + .collect() 77 + } 78 + 79 + fn resolve_url(href: &str, base: &reqwest::Url) -> Option<String> { 80 + let resolved = base.join(href).ok()?; 81 + if !matches!(resolved.scheme(), "http" | "https") { 82 + return None; 83 + } 84 + let s = resolved.to_string(); 85 + if s.len() > MAX_URL_LENGTH { 86 + return None; 87 + } 88 + Some(s) 89 + } 90 + 91 + fn parse_html_metadata(html: &str, url: &reqwest::Url) -> UrlMetadata { 92 + let mut metadata = UrlMetadata { 93 + title: None, 94 + description: None, 95 + favicon: None, 96 + image: None, 97 + }; 98 + 99 + // Title: <title> tag 100 + if let Some(caps) = regex_lite::Regex::new(r"(?i)<title[^>]*>([^<]+)</title>") 101 + .ok() 102 + .and_then(|re| re.captures(html)) 103 + { 104 + metadata.title = Some(sanitize_text(&caps[1], MAX_TITLE_LENGTH)); 105 + } 106 + 107 + // Fallback: og:title 108 + if metadata.title.is_none() { 109 + metadata.title = extract_meta_content(html, "property", "og:title") 110 + .map(|s| sanitize_text(&s, MAX_TITLE_LENGTH)); 111 + } 112 + 113 + // Description: <meta name="description"> 114 + metadata.description = extract_meta_content(html, "name", "description") 115 + .map(|s| sanitize_text(&s, MAX_DESCRIPTION_LENGTH)); 116 + 117 + // Fallback: og:description 118 + if metadata.description.is_none() { 119 + metadata.description = extract_meta_content(html, "property", "og:description") 120 + .map(|s| sanitize_text(&s, MAX_DESCRIPTION_LENGTH)); 121 + } 122 + 123 + // Favicon: <link rel="icon" href="..."> 124 + if let Some(caps) = regex_lite::Regex::new( 125 + r#"(?i)<link[^>]+rel=["'](?:icon|shortcut icon)["'][^>]+?href=["']([^"']+)["']"#, 126 + ) 127 + .ok() 128 + .and_then(|re| re.captures(html)) 129 + { 130 + metadata.favicon = resolve_url(&caps[1], url); 131 + } 132 + if metadata.favicon.is_none() { 133 + metadata.favicon = Some(default_favicon(url)); 134 + } 135 + 136 + // Image: og:image 137 + if let Some(img) = extract_meta_content(html, "property", "og:image") { 138 + metadata.image = resolve_url(&img, url); 139 + } 140 + 141 + // Fallback: twitter:image 142 + if metadata.image.is_none() { 143 + if let Some(img) = extract_meta_content(html, "name", "twitter:image") { 144 + metadata.image = resolve_url(&img, url); 145 + } 21 146 } 22 147 23 - resp.json::<UrlMetadata>().await.into_diagnostic() 148 + // Fallback title: hostname 149 + if metadata.title.is_none() { 150 + metadata.title = url.host_str().map(|h| h.to_string()); 151 + } 152 + 153 + metadata 154 + } 155 + 156 + /// Extract content from <meta> tag matching attr_name=attr_value 157 + fn extract_meta_content(html: &str, attr_name: &str, attr_value: &str) -> Option<String> { 158 + // Try: <meta attr="value" content="..."> 159 + let pattern1 = format!( 160 + r#"(?i)<meta[^>]+{attr_name}=["']{attr_value}["'][^>]+content=["']([^"']+)["']"# 161 + ); 162 + if let Some(caps) = regex_lite::Regex::new(&pattern1) 163 + .ok() 164 + .and_then(|re| re.captures(html)) 165 + { 166 + return Some(caps[1].to_string()); 167 + } 168 + 169 + // Try: <meta content="..." attr="value"> 170 + let pattern2 = format!( 171 + r#"(?i)<meta[^>]+content=["']([^"']+)["'][^>]+{attr_name}=["']{attr_value}["']"# 172 + ); 173 + if let Some(caps) = regex_lite::Regex::new(&pattern2) 174 + .ok() 175 + .and_then(|re| re.captures(html)) 176 + { 177 + return Some(caps[1].to_string()); 178 + } 179 + 180 + None 24 181 }