experiments in a post-browser web
10
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat(tools): add session/tab import, visit records, frecency planning, tag-folders option

+877 -26
+2 -1
tools/browser-import/package.json
··· 14 14 "dependencies": { 15 15 "better-sqlite3": "^12.5.0", 16 16 "chalk": "^5.3.0", 17 - "commander": "^12.1.0" 17 + "commander": "^12.1.0", 18 + "lz4": "^0.6.5" 18 19 }, 19 20 "devDependencies": { 20 21 "@types/better-sqlite3": "^7.6.8",
+134 -2
tools/browser-import/src/browsers/chrome.ts
··· 3 3 * 4 4 * Both Chrome and Brave use the same Chromium profile format. 5 5 * History is in a SQLite DB, bookmarks in a JSON file. 6 + * Sessions use SNSS binary format (complex to parse). 6 7 */ 7 8 8 9 import { existsSync, readFileSync, copyFileSync, mkdtempSync } from 'fs'; ··· 10 11 import { tmpdir } from 'os'; 11 12 import Database from 'better-sqlite3'; 12 13 import type { BrowserReader } from './types.js'; 13 - import type { HistoryEntry, BookmarkEntry, DataInspection } from '../types.js'; 14 + import type { HistoryEntry, BookmarkEntry, SessionTab, DataInspection, VisitRecord } from '../types.js'; 14 15 15 16 /** 16 17 * Copy a SQLite DB to a temp location before reading. ··· 96 97 return entries; 97 98 } 98 99 100 + /** 101 + * Try to read session tabs from Chrome's Preferences JSON. 102 + * 103 + * Chrome's full session data is in SNSS binary format (Current Session, Current Tabs, 104 + * Sessions/Session_*, Sessions/Tabs_*), which is complex to parse without a dedicated 105 + * library. For now, we extract what we can from the Preferences JSON: 106 + * - Pinned tabs (session.startup_urls when restore_on_startup = 4) 107 + * - Tab groups metadata 108 + * 109 + * TODO: Full SNSS parsing. The SNSS format uses Pickle serialization with command types 110 + * for UpdateTabNavigation, SetSelectedTabInWindow, SetTabGroup, etc. No reliable npm 111 + * package exists for this format. Consider writing a custom parser or using the 112 + * chrome-session-parser approach (reverse-engineered format). 113 + */ 114 + function readChromeSessionTabs(profilePath: string): SessionTab[] { 115 + const tabs: SessionTab[] = []; 116 + 117 + // Try reading Preferences for startup/pinned tabs 118 + const prefsPath = join(profilePath, 'Preferences'); 119 + if (!existsSync(prefsPath)) return tabs; 120 + 121 + try { 122 + const prefs = JSON.parse(readFileSync(prefsPath, 'utf-8')); 123 + 124 + // Check for startup URLs (when user has "Open a specific page or set of pages") 125 + const startupUrls = prefs?.session?.startup_urls; 126 + if (Array.isArray(startupUrls)) { 127 + for (const url of startupUrls) { 128 + if (typeof url === 'string' && !url.startsWith('chrome://') && 129 + !url.startsWith('chrome-extension://') && !url.startsWith('brave://')) { 130 + tabs.push({ 131 + url, 132 + title: '', // Preferences doesn't store titles for startup URLs 133 + pinned: false, 134 + }); 135 + } 136 + } 137 + } 138 + 139 + // Check for pinned tabs 140 + const pinnedTabs = prefs?.pinned_tabs; 141 + if (Array.isArray(pinnedTabs)) { 142 + for (const tab of pinnedTabs) { 143 + if (tab && tab.url && typeof tab.url === 'string' && 144 + !tab.url.startsWith('chrome://') && !tab.url.startsWith('brave://')) { 145 + tabs.push({ 146 + url: tab.url, 147 + title: tab.title || '', 148 + pinned: true, 149 + }); 150 + } 151 + } 152 + } 153 + 154 + // Extract tab group names if available (for tagging reference) 155 + // Chrome stores tab_groups in Preferences with group IDs mapping to titles 156 + const tabGroups = prefs?.tab_groups; 157 + if (tabGroups && typeof tabGroups === 'object') { 158 + // Tab group data is stored but can't be fully linked without SNSS session data 159 + // We store the group names for reference 160 + } 161 + } catch { 162 + // Can't parse preferences 163 + } 164 + 165 + return tabs; 166 + } 167 + 168 + /** 169 + * Count session tabs from Chrome Preferences 170 + */ 171 + function countChromeSessionTabs(profilePath: string): number { 172 + try { 173 + return readChromeSessionTabs(profilePath).length; 174 + } catch { 175 + return 0; 176 + } 177 + } 178 + 99 179 export const chromeReader: BrowserReader = { 100 180 readHistory(profilePath: string): HistoryEntry[] { 101 181 const dbPath = join(profilePath, 'History'); ··· 124 204 last_visit_time: number; 125 205 }>; 126 206 207 + // Build visit timeline from visits table 208 + const visitsByUrl = new Map<string, VisitRecord[]>(); 209 + try { 210 + const visitRows = db.prepare(` 211 + SELECT 212 + u.url, 213 + v.visit_time, 214 + v.transition 215 + FROM visits v 216 + JOIN urls u ON v.url = u.id 217 + WHERE u.url NOT LIKE 'chrome://%' 218 + AND u.url NOT LIKE 'chrome-extension://%' 219 + AND u.url NOT LIKE 'brave://%' 220 + AND u.url NOT LIKE 'edge://%' 221 + ORDER BY v.visit_time ASC 222 + `).all() as Array<{ 223 + url: string; 224 + visit_time: number; 225 + transition: number; 226 + }>; 227 + 228 + for (const vr of visitRows) { 229 + let visits = visitsByUrl.get(vr.url); 230 + if (!visits) { 231 + visits = []; 232 + visitsByUrl.set(vr.url, visits); 233 + } 234 + visits.push({ 235 + timestamp: chromeTimeToUnixMs(vr.visit_time), 236 + visitType: vr.transition & 0xFF, // Core transition type is in lower bits 237 + }); 238 + } 239 + } catch { 240 + // visits query failed, continue without individual visits 241 + } 242 + 127 243 return rows.map(row => ({ 128 244 url: row.url, 129 245 title: row.title || '', 130 246 visitCount: row.visit_count, 131 247 lastVisitTime: chromeTimeToUnixMs(row.last_visit_time), 248 + visits: visitsByUrl.get(row.url), 132 249 })); 133 250 } catch { 134 251 return []; ··· 160 277 } 161 278 }, 162 279 280 + readSessions(profilePath: string): SessionTab[] { 281 + return readChromeSessionTabs(profilePath); 282 + }, 283 + 163 284 inspect(profilePath: string): DataInspection[] { 164 285 const results: DataInspection[] = []; 165 286 ··· 187 308 const bookmarksPath = join(profilePath, 'Bookmarks'); 188 309 if (existsSync(bookmarksPath)) { 189 310 try { 190 - const data = JSON.parse(readFileSync(bookmarksPath, 'utf-8')); 191 311 const entries = this.readBookmarks(profilePath); 192 312 results.push({ type: 'bookmarks', count: entries.length, importable: true, label: 'Bookmarks' }); 193 313 } catch { 194 314 results.push({ type: 'bookmarks', count: 0, importable: true, label: 'Bookmarks' }); 195 315 } 196 316 } 317 + 318 + // Sessions (from Preferences, partial) 319 + const sessionCount = countChromeSessionTabs(profilePath); 320 + const sessionLabel = sessionCount > 0 321 + ? 'Session Tabs (from Preferences, partial)' 322 + : 'Session Tabs (SNSS format, not yet supported)'; 323 + results.push({ 324 + type: 'sessions', 325 + count: sessionCount, 326 + importable: sessionCount > 0, 327 + label: sessionLabel, 328 + }); 197 329 198 330 // Passwords (Login Data SQLite) 199 331 const loginDb = openSafely(join(profilePath, 'Login Data'));
+144 -1
tools/browser-import/src/browsers/firefox.ts
··· 2 2 * Firefox profile reader 3 3 * 4 4 * Reads history and bookmarks from Firefox's places.sqlite, 5 + * sessions from sessionstore-backups/recovery.jsonlz4, 5 6 * and inspects other data files (passwords, cookies, form data, extensions). 6 7 */ 7 8 ··· 10 11 import { tmpdir } from 'os'; 11 12 import Database from 'better-sqlite3'; 12 13 import type { BrowserReader } from './types.js'; 13 - import type { HistoryEntry, BookmarkEntry, DataInspection } from '../types.js'; 14 + import type { HistoryEntry, BookmarkEntry, SessionTab, DataInspection, VisitRecord } from '../types.js'; 15 + import { decompressJsonlz4 } from '../util/jsonlz4.js'; 14 16 15 17 /** 16 18 * Copy a SQLite DB to a temp location before reading. ··· 42 44 } 43 45 } 44 46 47 + /** 48 + * Find the Firefox session file. Tries multiple locations in priority order: 49 + * 1. sessionstore-backups/recovery.jsonlz4 (most current, written every ~15s) 50 + * 2. sessionstore.jsonlz4 (written on clean shutdown) 51 + */ 52 + function findSessionFile(profilePath: string): string | null { 53 + const candidates = [ 54 + join(profilePath, 'sessionstore-backups', 'recovery.jsonlz4'), 55 + join(profilePath, 'sessionstore.jsonlz4'), 56 + ]; 57 + 58 + for (const candidate of candidates) { 59 + if (existsSync(candidate)) return candidate; 60 + } 61 + return null; 62 + } 63 + 64 + /** 65 + * Parse a Firefox session JSON structure into SessionTab entries. 66 + * Session structure: { windows: [{ tabs: [{ entries: [{ url, title }], index, ... }] }] } 67 + */ 68 + function parseFirefoxSession(sessionData: any): SessionTab[] { 69 + const tabs: SessionTab[] = []; 70 + 71 + if (!sessionData || !Array.isArray(sessionData.windows)) return tabs; 72 + 73 + for (let windowIndex = 0; windowIndex < sessionData.windows.length; windowIndex++) { 74 + const window = sessionData.windows[windowIndex]; 75 + if (!window || !Array.isArray(window.tabs)) continue; 76 + 77 + for (const tab of window.tabs) { 78 + if (!tab || !Array.isArray(tab.entries) || tab.entries.length === 0) continue; 79 + 80 + // Current entry is at index - 1 (1-based) 81 + const currentIndex = (tab.index || tab.entries.length) - 1; 82 + const entry = tab.entries[Math.min(currentIndex, tab.entries.length - 1)]; 83 + 84 + if (!entry || !entry.url) continue; 85 + 86 + // Skip internal Firefox URLs 87 + if (entry.url.startsWith('about:') || entry.url.startsWith('resource:') || 88 + entry.url.startsWith('chrome:') || entry.url.startsWith('moz-extension:')) { 89 + continue; 90 + } 91 + 92 + // Determine tab group name (Firefox 131+ tab groups) 93 + let tabGroup: string | undefined; 94 + if (tab.groupId !== undefined && tab.groupId !== -1 && window.tabGroups) { 95 + const group = window.tabGroups.find?.((g: any) => g.id === tab.groupId); 96 + if (group && group.name) { 97 + tabGroup = group.name; 98 + } 99 + } 100 + 101 + tabs.push({ 102 + url: entry.url, 103 + title: entry.title || '', 104 + tabGroup, 105 + pinned: !!tab.pinned, 106 + lastAccessed: tab.lastAccessed || undefined, 107 + windowIndex, 108 + }); 109 + } 110 + } 111 + 112 + return tabs; 113 + } 114 + 115 + /** 116 + * Count tabs in a Firefox session file without fully parsing 117 + */ 118 + function countSessionTabs(profilePath: string): number { 119 + const sessionFile = findSessionFile(profilePath); 120 + if (!sessionFile) return 0; 121 + 122 + try { 123 + const sessionData = decompressJsonlz4(sessionFile); 124 + const tabs = parseFirefoxSession(sessionData); 125 + return tabs.length; 126 + } catch { 127 + return 0; 128 + } 129 + } 130 + 45 131 export const firefoxReader: BrowserReader = { 46 132 readHistory(profilePath: string): HistoryEntry[] { 47 133 const dbPath = join(profilePath, 'places.sqlite'); ··· 49 135 if (!db) return []; 50 136 51 137 try { 138 + // Get history entries with aggregated visit data 52 139 const rows = db.prepare(` 53 140 SELECT 54 141 p.url, ··· 68 155 last_visit_date: number; 69 156 }>; 70 157 158 + // Build a map of place URL -> individual visits for visit timeline 159 + const visitsByUrl = new Map<string, VisitRecord[]>(); 160 + try { 161 + const visitRows = db.prepare(` 162 + SELECT 163 + p.url, 164 + v.visit_date, 165 + v.visit_type 166 + FROM moz_historyvisits v 167 + JOIN moz_places p ON v.place_id = p.id 168 + WHERE p.url NOT LIKE 'place:%' 169 + AND p.url NOT LIKE 'about:%' 170 + ORDER BY v.visit_date ASC 171 + `).all() as Array<{ 172 + url: string; 173 + visit_date: number; 174 + visit_type: number; 175 + }>; 176 + 177 + for (const vr of visitRows) { 178 + let visits = visitsByUrl.get(vr.url); 179 + if (!visits) { 180 + visits = []; 181 + visitsByUrl.set(vr.url, visits); 182 + } 183 + visits.push({ 184 + timestamp: Math.floor(vr.visit_date / 1000), // microseconds to ms 185 + visitType: vr.visit_type, 186 + }); 187 + } 188 + } catch { 189 + // visits query failed, continue without individual visits 190 + } 191 + 71 192 return rows.map(row => ({ 72 193 url: row.url, 73 194 title: row.title || '', 74 195 visitCount: row.visit_count, 75 196 // Firefox stores timestamps in microseconds since epoch 76 197 lastVisitTime: Math.floor(row.last_visit_date / 1000), 198 + visits: visitsByUrl.get(row.url), 77 199 })); 78 200 } catch { 79 201 return []; ··· 148 270 } 149 271 }, 150 272 273 + readSessions(profilePath: string): SessionTab[] { 274 + const sessionFile = findSessionFile(profilePath); 275 + if (!sessionFile) return []; 276 + 277 + try { 278 + const sessionData = decompressJsonlz4(sessionFile); 279 + return parseFirefoxSession(sessionData); 280 + } catch { 281 + return []; 282 + } 283 + }, 284 + 151 285 inspect(profilePath: string): DataInspection[] { 152 286 const results: DataInspection[] = []; 153 287 ··· 174 308 placesDb.close(); 175 309 } 176 310 } 311 + 312 + // Sessions 313 + const sessionTabCount = countSessionTabs(profilePath); 314 + results.push({ 315 + type: 'sessions', 316 + count: sessionTabCount, 317 + importable: true, 318 + label: 'Session Tabs', 319 + }); 177 320 178 321 // Passwords (logins.json) 179 322 const loginsPath = join(profilePath, 'logins.json');
+4 -1
tools/browser-import/src/browsers/types.ts
··· 2 2 * Shared browser reader types 3 3 */ 4 4 5 - import type { HistoryEntry, BookmarkEntry, DataInspection } from '../types.js'; 5 + import type { HistoryEntry, BookmarkEntry, SessionTab, DataInspection } from '../types.js'; 6 6 7 7 export interface BrowserReader { 8 8 /** Read history entries from the profile */ ··· 10 10 11 11 /** Read bookmark entries from the profile */ 12 12 readBookmarks(profilePath: string): BookmarkEntry[]; 13 + 14 + /** Read session/tab entries from the profile */ 15 + readSessions(profilePath: string): SessionTab[]; 13 16 14 17 /** Inspect what data is available in the profile */ 15 18 inspect(profilePath: string): DataInspection[];
+37
tools/browser-import/src/datastore.ts
··· 234 234 } 235 235 236 236 /** 237 + * Get the item ID for a URL, or null if it doesn't exist 238 + */ 239 + export function getItemIdByUrl(url: string): string | null { 240 + if (!db) throw new Error('Database not opened'); 241 + const row = db.prepare( 242 + "SELECT id FROM items WHERE type = 'url' AND content = ? AND deletedAt = 0" 243 + ).get(url) as { id: string } | undefined; 244 + return row ? row.id : null; 245 + } 246 + 247 + /** 248 + * Update visit count and lastVisitAt on an existing item 249 + */ 250 + export function updateItemVisitData(itemId: string, visitCount: number, lastVisitAt: number): void { 251 + if (!db) throw new Error('Database not opened'); 252 + db.prepare( 253 + 'UPDATE items SET visitCount = MAX(visitCount, ?), lastVisitAt = MAX(lastVisitAt, ?) WHERE id = ?' 254 + ).run(visitCount, lastVisitAt, itemId); 255 + } 256 + 257 + /** 258 + * Record a visit event for an item in the item_events table 259 + */ 260 + export function recordVisit(itemId: string, timestamp: number, metadata?: Record<string, unknown>): void { 261 + if (!db) throw new Error('Database not opened'); 262 + 263 + const eventId = generateId('evt'); 264 + const ts = now(); 265 + const metadataJson = JSON.stringify(metadata || { eventType: 'visit' }); 266 + 267 + db.prepare(` 268 + INSERT INTO item_events (id, itemId, content, value, occurredAt, metadata, createdAt) 269 + VALUES (?, ?, 'visit', NULL, ?, ?, ?) 270 + `).run(eventId, itemId, timestamp, metadataJson, ts); 271 + } 272 + 273 + /** 237 274 * Run a function inside a transaction for performance 238 275 */ 239 276 export function runInTransaction<T>(fn: () => T): T {
+15 -10
tools/browser-import/src/importers/bookmarks.ts
··· 31 31 entries: BookmarkEntry[], 32 32 browserName: BrowserName, 33 33 profileName: string, 34 + options?: { tagFolders?: boolean }, 34 35 ): ImportResult { 36 + const tagFolders = options?.tagFolders !== false; // default true 37 + 35 38 const result: ImportResult = { 36 39 itemsImported: 0, 37 40 tagsCreated: 0, ··· 88 91 tagItem(itemId, profileTag.tagId); 89 92 tagItem(itemId, bookmarkTag.tagId); 90 93 91 - // Apply folder hierarchy tags 92 - const folderTags = folderPathToTags(entry.folderPath); 93 - for (const folderTagName of folderTags) { 94 - let folderTagId = folderTagCache.get(folderTagName); 95 - if (!folderTagId) { 96 - const { tagId, created } = getOrCreateTag(folderTagName); 97 - folderTagId = tagId; 98 - folderTagCache.set(folderTagName, tagId); 99 - if (created) result.tagsCreated++; 94 + // Apply folder hierarchy tags (when enabled) 95 + if (tagFolders) { 96 + const folderTags = folderPathToTags(entry.folderPath); 97 + for (const folderTagName of folderTags) { 98 + let folderTagId = folderTagCache.get(folderTagName); 99 + if (!folderTagId) { 100 + const { tagId, created } = getOrCreateTag(folderTagName); 101 + folderTagId = tagId; 102 + folderTagCache.set(folderTagName, tagId); 103 + if (created) result.tagsCreated++; 104 + } 105 + tagItem(itemId, folderTagId); 100 106 } 101 - tagItem(itemId, folderTagId); 102 107 } 103 108 104 109 result.itemsImported++;
+219
tools/browser-import/src/importers/frecency.ts
··· 1 + /** 2 + * Firefox Frecency & Adaptive Matching — Research and Planning 3 + * 4 + * This module reads Firefox's frecency scores and adaptive matching data 5 + * from places.sqlite. These provide rich ranking signals that can enhance 6 + * Peek's own frecency system. 7 + * 8 + * ============================================================ 9 + * FIREFOX FRECENCY ALGORITHM 10 + * ============================================================ 11 + * 12 + * The `frecency` column in `moz_places` is an integer score that combines 13 + * frequency (how often) and recency (how recently) a page was visited. 14 + * 15 + * How Firefox calculates frecency: 16 + * 17 + * 1. Take the last 10 visits for a place 18 + * 2. For each visit, assign a "bonus" based on visit type: 19 + * - TRANSITION_EMBED (0): 0 points (framed content) 20 + * - TRANSITION_LINK (1): 100 points (clicked a link) 21 + * - TRANSITION_TYPED (2): 2000 points (typed in URL bar) 22 + * - TRANSITION_BOOKMARK (3): 75 points (opened from bookmark) 23 + * - TRANSITION_DOWNLOAD (7): 0 points 24 + * - TRANSITION_REDIRECT_PERMANENT (5): 0 points 25 + * - TRANSITION_REDIRECT_TEMPORARY (6): 0 points 26 + * - Default: 0 points 27 + * 28 + * 3. Apply a recency "bucket weight" based on how long ago the visit was: 29 + * - Within 4 days: 100% weight 30 + * - Within 14 days: 70% weight 31 + * - Within 31 days: 50% weight 32 + * - Within 90 days: 30% weight 33 + * - Older than 90 days: 10% weight 34 + * 35 + * 4. For each visit: points = ceil(bonus * bucketWeight / 100) 36 + * 5. Final frecency = ceil(total_points * visit_count / sample_size) 37 + * where sample_size = min(visit_count, 10) 38 + * 39 + * A page that was typed (2000 bonus) within 4 days (100% weight) 40 + * scores 2000 per visit. A link-clicked page (100 bonus) from 2 months ago 41 + * (30% weight) scores 30 per visit. 42 + * 43 + * Special cases: 44 + * - Unvisited bookmarks get frecency = 140 45 + * - Pages with only embed/redirect visits get frecency = 0 46 + * - Pages with frecency = -1 are explicitly excluded from autocomplete 47 + * 48 + * ============================================================ 49 + * ADAPTIVE MATCHING (moz_inputhistory) 50 + * ============================================================ 51 + * 52 + * Firefox's `moz_inputhistory` table maps typed text to selected URLs. 53 + * When a user types "gi" in the URL bar and selects github.com, Firefox 54 + * records this association with a use count. 55 + * 56 + * Table structure: 57 + * place_id INTEGER — references moz_places.id 58 + * input TEXT — the text the user typed 59 + * use_count INTEGER — how many times this text->URL association was used 60 + * 61 + * The adaptive matching data is decay-weighted: each time the entry is used, 62 + * use_count = (use_count * 0.9) + 1. Old associations decay toward 0. 63 + * 64 + * This is extremely valuable for command palette matching in Peek — it 65 + * captures the user's actual abbreviation patterns. 66 + * 67 + * ============================================================ 68 + * TODO: MAPPING TO PEEK'S SYSTEM 69 + * ============================================================ 70 + * 71 + * Peek has its own frecency system (items.frecencyScore column). Potential 72 + * integration approaches: 73 + * 74 + * 1. **Direct import**: Copy Firefox's frecency score into Peek's 75 + * frecencyScore column. Simple but scores may not be comparable 76 + * with Peek's internally-generated scores. 77 + * 78 + * 2. **Normalized import**: Scale Firefox frecency scores to match 79 + * Peek's expected range. Need to analyze Peek's frecency calculation 80 + * to determine the right normalization. 81 + * 82 + * 3. **Seed data**: Import Firefox frecency as metadata, then let Peek 83 + * rebuild its own frecency using Firefox's visit data as a starting 84 + * point. This is the most correct approach. 85 + * 86 + * For adaptive matching: 87 + * 88 + * 1. **Metadata storage**: Import input associations into item metadata 89 + * as `{ adaptiveMatches: [{ input: "gi", useCount: 5.2 }, ...] }`. 90 + * The cmd palette could read these for boosting. 91 + * 92 + * 2. **Separate table**: Create a dedicated search_hints table for 93 + * input->URL associations. More structured but requires schema change. 94 + * 95 + * 3. **item_events**: Record each adaptive match as an item_event with 96 + * content = input text. Preserves the timeline. 97 + * 98 + * Recommended approach: Option 3 (seed data) for frecency, Option 1 99 + * (metadata) for adaptive matching. The visit timeline is already being 100 + * imported into item_events, so Peek can recalculate frecency from that 101 + * raw data. The adaptive matches are small enough to fit in metadata. 102 + */ 103 + 104 + import { existsSync, copyFileSync, mkdtempSync } from 'fs'; 105 + import { join } from 'path'; 106 + import { tmpdir } from 'os'; 107 + import Database from 'better-sqlite3'; 108 + 109 + export interface FrecencyData { 110 + url: string; 111 + frecency: number; 112 + title: string; 113 + } 114 + 115 + export interface AdaptiveMatch { 116 + url: string; 117 + input: string; 118 + useCount: number; 119 + } 120 + 121 + /** 122 + * Copy a SQLite DB to a temp location before reading. 123 + */ 124 + function copyToTemp(dbPath: string): string { 125 + const tempDir = mkdtempSync(join(tmpdir(), 'browser-import-')); 126 + const tempPath = join(tempDir, 'copy.sqlite'); 127 + copyFileSync(dbPath, tempPath); 128 + const walPath = dbPath + '-wal'; 129 + const shmPath = dbPath + '-shm'; 130 + if (existsSync(walPath)) { 131 + copyFileSync(walPath, tempPath + '-wal'); 132 + } 133 + if (existsSync(shmPath)) { 134 + copyFileSync(shmPath, tempPath + '-shm'); 135 + } 136 + return tempPath; 137 + } 138 + 139 + /** 140 + * Read frecency scores from Firefox's places.sqlite. 141 + * 142 + * Returns all places with non-zero frecency, sorted by score descending. 143 + */ 144 + export function readFrecencyData(profilePath: string): FrecencyData[] { 145 + const dbPath = join(profilePath, 'places.sqlite'); 146 + if (!existsSync(dbPath)) return []; 147 + 148 + let db: Database.Database | null = null; 149 + try { 150 + const tempPath = copyToTemp(dbPath); 151 + db = new Database(tempPath, { readonly: true }); 152 + 153 + const rows = db.prepare(` 154 + SELECT url, title, frecency 155 + FROM moz_places 156 + WHERE frecency > 0 157 + AND url NOT LIKE 'place:%' 158 + AND url NOT LIKE 'about:%' 159 + ORDER BY frecency DESC 160 + `).all() as Array<{ 161 + url: string; 162 + title: string | null; 163 + frecency: number; 164 + }>; 165 + 166 + return rows.map(row => ({ 167 + url: row.url, 168 + title: row.title || '', 169 + frecency: row.frecency, 170 + })); 171 + } catch { 172 + return []; 173 + } finally { 174 + if (db) db.close(); 175 + } 176 + } 177 + 178 + /** 179 + * Read adaptive matching data from Firefox's places.sqlite. 180 + * 181 + * Returns all input->URL associations with their (decayed) use counts. 182 + * This data captures the user's typing patterns for URL bar autocomplete. 183 + */ 184 + export function readAdaptiveMatchingData(profilePath: string): AdaptiveMatch[] { 185 + const dbPath = join(profilePath, 'places.sqlite'); 186 + if (!existsSync(dbPath)) return []; 187 + 188 + let db: Database.Database | null = null; 189 + try { 190 + const tempPath = copyToTemp(dbPath); 191 + db = new Database(tempPath, { readonly: true }); 192 + 193 + const rows = db.prepare(` 194 + SELECT 195 + p.url, 196 + h.input, 197 + h.use_count 198 + FROM moz_inputhistory h 199 + JOIN moz_places p ON h.place_id = p.id 200 + WHERE p.url NOT LIKE 'place:%' 201 + AND p.url NOT LIKE 'about:%' 202 + ORDER BY h.use_count DESC 203 + `).all() as Array<{ 204 + url: string; 205 + input: string; 206 + use_count: number; 207 + }>; 208 + 209 + return rows.map(row => ({ 210 + url: row.url, 211 + input: row.input, 212 + useCount: row.use_count, 213 + })); 214 + } catch { 215 + return []; 216 + } finally { 217 + if (db) db.close(); 218 + } 219 + }
+35 -3
tools/browser-import/src/importers/history.ts
··· 2 2 * History importer 3 3 * 4 4 * Reads browser history entries and creates url items in the Peek datastore. 5 + * Also records individual visits as item_events for full visit timeline. 5 6 */ 6 7 7 8 import type { HistoryEntry, ImportResult, BrowserName } from '../types.js'; 8 - import { itemExistsByUrl, createItem, getOrCreateTag, tagItem, runInTransaction } from '../datastore.js'; 9 + import { 10 + itemExistsByUrl, createItem, getOrCreateTag, tagItem, 11 + getItemIdByUrl, updateItemVisitData, recordVisit, 12 + runInTransaction, 13 + } from '../datastore.js'; 9 14 10 15 export function importHistory( 11 16 entries: HistoryEntry[], ··· 40 45 41 46 for (const entry of entries) { 42 47 try { 43 - // Skip if URL already exists 44 - if (itemExistsByUrl(entry.url)) { 48 + // Check if URL already exists 49 + const existingId = getItemIdByUrl(entry.url); 50 + 51 + if (existingId) { 52 + // Update visit data on existing item if our data is newer/more complete 53 + updateItemVisitData(existingId, entry.visitCount, entry.lastVisitTime); 54 + 55 + // Still record individual visits even for existing items 56 + if (entry.visits && entry.visits.length > 0) { 57 + for (const visit of entry.visits) { 58 + recordVisit(existingId, visit.timestamp, { 59 + eventType: 'visit', 60 + visitType: visit.visitType, 61 + sourceBrowser: browserName, 62 + }); 63 + } 64 + } 65 + 45 66 result.duplicatesSkipped++; 46 67 continue; 47 68 } ··· 67 88 tagItem(itemId, browserNameTag.tagId); 68 89 tagItem(itemId, profileTag.tagId); 69 90 tagItem(itemId, historyTag.tagId); 91 + 92 + // Record individual visits as item_events 93 + if (entry.visits && entry.visits.length > 0) { 94 + for (const visit of entry.visits) { 95 + recordVisit(itemId, visit.timestamp, { 96 + eventType: 'visit', 97 + visitType: visit.visitType, 98 + sourceBrowser: browserName, 99 + }); 100 + } 101 + } 70 102 71 103 result.itemsImported++; 72 104 } catch (err) {
+2
tools/browser-import/src/importers/index.ts
··· 4 4 5 5 export { importHistory } from './history.js'; 6 6 export { importBookmarks } from './bookmarks.js'; 7 + export { importSessions } from './sessions.js'; 8 + export { readFrecencyData, readAdaptiveMatchingData } from './frecency.js';
+117
tools/browser-import/src/importers/sessions.ts
··· 1 + /** 2 + * Session/tab importer 3 + * 4 + * Imports browser session tabs (open tabs across windows) as url items in the Peek datastore. 5 + * Tags with browser:session, browser:tab, import:browser, import:{browser}. 6 + * Tab groups become additional tags like tab-group:{name}. 7 + */ 8 + 9 + import type { SessionTab, ImportResult, BrowserName } from '../types.js'; 10 + import { itemExistsByUrl, createItem, getOrCreateTag, tagItem, runInTransaction } from '../datastore.js'; 11 + 12 + export function importSessions( 13 + tabs: SessionTab[], 14 + browserName: BrowserName, 15 + profileName: string, 16 + ): ImportResult { 17 + const result: ImportResult = { 18 + itemsImported: 0, 19 + tagsCreated: 0, 20 + duplicatesSkipped: 0, 21 + errors: [], 22 + }; 23 + 24 + if (tabs.length === 0) return result; 25 + 26 + const importTimestamp = new Date().toISOString(); 27 + 28 + runInTransaction(() => { 29 + // Create shared tags 30 + const browserTag = getOrCreateTag('import:browser'); 31 + if (browserTag.created) result.tagsCreated++; 32 + 33 + const browserNameTag = getOrCreateTag(`import:${browserName}`); 34 + if (browserNameTag.created) result.tagsCreated++; 35 + 36 + const profileTag = getOrCreateTag(`import:${profileName}`); 37 + if (profileTag.created) result.tagsCreated++; 38 + 39 + const sessionTag = getOrCreateTag('browser:session'); 40 + if (sessionTag.created) result.tagsCreated++; 41 + 42 + const tabTag = getOrCreateTag('browser:tab'); 43 + if (tabTag.created) result.tagsCreated++; 44 + 45 + // Cache for tab group tags 46 + const groupTagCache = new Map<string, string>(); 47 + 48 + // Cache for pinned tag 49 + let pinnedTagId: string | null = null; 50 + 51 + for (const tab of tabs) { 52 + try { 53 + // Skip if URL already exists 54 + if (itemExistsByUrl(tab.url)) { 55 + result.duplicatesSkipped++; 56 + continue; 57 + } 58 + 59 + const metadata: Record<string, unknown> = { 60 + title: tab.title, 61 + sourceProfile: profileName, 62 + sourceBrowser: browserName, 63 + importedAt: importTimestamp, 64 + importSource: 'session', 65 + }; 66 + 67 + if (tab.pinned) metadata.pinned = true; 68 + if (tab.windowIndex !== undefined) metadata.windowIndex = tab.windowIndex; 69 + if (tab.lastAccessed) metadata.lastAccessed = tab.lastAccessed; 70 + if (tab.tabGroup) metadata.tabGroup = tab.tabGroup; 71 + 72 + const itemId = createItem({ 73 + type: 'url', 74 + content: tab.url, 75 + title: tab.title, 76 + metadata, 77 + }); 78 + 79 + // Apply shared tags 80 + tagItem(itemId, browserTag.tagId); 81 + tagItem(itemId, browserNameTag.tagId); 82 + tagItem(itemId, profileTag.tagId); 83 + tagItem(itemId, sessionTag.tagId); 84 + tagItem(itemId, tabTag.tagId); 85 + 86 + // Apply pinned tag if applicable 87 + if (tab.pinned) { 88 + if (!pinnedTagId) { 89 + const { tagId, created } = getOrCreateTag('browser:pinned'); 90 + pinnedTagId = tagId; 91 + if (created) result.tagsCreated++; 92 + } 93 + tagItem(itemId, pinnedTagId); 94 + } 95 + 96 + // Apply tab group tag if applicable 97 + if (tab.tabGroup) { 98 + const groupTagName = `tab-group:${tab.tabGroup}`; 99 + let groupTagId = groupTagCache.get(groupTagName); 100 + if (!groupTagId) { 101 + const { tagId, created } = getOrCreateTag(groupTagName); 102 + groupTagId = tagId; 103 + groupTagCache.set(groupTagName, tagId); 104 + if (created) result.tagsCreated++; 105 + } 106 + tagItem(itemId, groupTagId); 107 + } 108 + 109 + result.itemsImported++; 110 + } catch (err) { 111 + result.errors.push(`Failed to import session tab ${tab.url}: ${(err as Error).message}`); 112 + } 113 + } 114 + }); 115 + 116 + return result; 117 + }
+87 -6
tools/browser-import/src/index.ts
··· 18 18 import { discoverAllProfiles, findProfile } from './profiles.js'; 19 19 import { firefoxReader } from './browsers/firefox.js'; 20 20 import { chromeReader } from './browsers/chrome.js'; 21 - import { importHistory, importBookmarks } from './importers/index.js'; 21 + import { importHistory, importBookmarks, importSessions } from './importers/index.js'; 22 22 import { openDatabase, closeDatabase } from './datastore.js'; 23 23 import type { BrowserProfile, BrowserName, ImportType } from './types.js'; 24 24 import type { BrowserReader } from './browsers/types.js'; ··· 27 27 28 28 program 29 29 .name('browser-import') 30 - .description('Import browser history and bookmarks into a Peek datastore') 30 + .description('Import browser history, bookmarks, and sessions into a Peek datastore') 31 31 .version('1.0.0'); 32 32 33 33 // ==================== discover ==================== ··· 121 121 .description('Import browser data into a Peek datastore') 122 122 .argument('<profile>', 'Profile path or "browser/name"') 123 123 .option('--db <path>', 'Path to Peek database file', './peek.db') 124 - .option('--types <types>', 'Comma-separated import types: history,bookmarks', 'history,bookmarks') 124 + .option('--types <types>', 'Comma-separated import types: history,bookmarks,sessions', 'history,bookmarks,sessions') 125 + .option('--tag-folders', 'Create tags from bookmark folder hierarchy (default: true)', true) 126 + .option('--no-tag-folders', 'Skip creating tags from bookmark folder hierarchy') 125 127 .option('--dry-run', 'Show what would be imported without writing', false) 126 - .action((profileQuery: string, options: { db: string; types: string; dryRun: boolean }) => { 128 + .action((profileQuery: string, options: { 129 + db: string; 130 + types: string; 131 + dryRun: boolean; 132 + tagFolders: boolean; 133 + }) => { 127 134 const profiles = discoverAllProfiles(); 128 135 const profile = findProfile(profileQuery, profiles); 129 136 ··· 146 153 console.log(chalk.dim(` Source: ${profile.profilePath}`)); 147 154 console.log(chalk.dim(` Target: ${dbPath}`)); 148 155 console.log(chalk.dim(` Types: ${importTypes.join(', ')}`)); 156 + if (importTypes.includes('bookmarks')) { 157 + console.log(chalk.dim(` Tag folders: ${options.tagFolders ? 'yes' : 'no'}`)); 158 + } 149 159 console.log(); 150 160 151 161 if (options.dryRun) { ··· 169 179 const entries = reader.readHistory(profile.profilePath); 170 180 console.log(chalk.dim(` Found ${entries.length.toLocaleString()} entries`)); 171 181 182 + const totalVisits = entries.reduce((sum, e) => sum + (e.visits?.length || 0), 0); 183 + if (totalVisits > 0) { 184 + console.log(chalk.dim(` Including ${totalVisits.toLocaleString()} individual visit records`)); 185 + } 186 + 172 187 if (entries.length > 0) { 173 188 console.log(chalk.cyan('Importing history...')); 174 189 const result = importHistory(entries, profile.browser, profile.profileName); ··· 202 217 203 218 if (entries.length > 0) { 204 219 console.log(chalk.cyan('Importing bookmarks...')); 205 - const result = importBookmarks(entries, profile.browser, profile.profileName); 220 + const result = importBookmarks(entries, profile.browser, profile.profileName, { 221 + tagFolders: options.tagFolders, 222 + }); 223 + totalImported += result.itemsImported; 224 + totalTags += result.tagsCreated; 225 + totalDuplicates += result.duplicatesSkipped; 226 + totalErrors += result.errors.length; 227 + 228 + console.log(chalk.green(` Imported: ${result.itemsImported.toLocaleString()}`)); 229 + if (result.duplicatesSkipped > 0) { 230 + console.log(chalk.yellow(` Skipped (duplicates): ${result.duplicatesSkipped.toLocaleString()}`)); 231 + } 232 + if (result.errors.length > 0) { 233 + console.log(chalk.red(` Errors: ${result.errors.length}`)); 234 + for (const err of result.errors.slice(0, 5)) { 235 + console.log(chalk.red(` ${err}`)); 236 + } 237 + } 238 + } 239 + console.log(); 240 + } 241 + 242 + // Import sessions 243 + if (importTypes.includes('sessions')) { 244 + console.log(chalk.cyan('Reading session tabs...')); 245 + const tabs = reader.readSessions(profile.profilePath); 246 + console.log(chalk.dim(` Found ${tabs.length.toLocaleString()} tabs`)); 247 + 248 + if (tabs.length > 0) { 249 + const pinnedCount = tabs.filter(t => t.pinned).length; 250 + const groupedCount = tabs.filter(t => t.tabGroup).length; 251 + const windowCount = new Set(tabs.map(t => t.windowIndex)).size; 252 + 253 + if (pinnedCount > 0) console.log(chalk.dim(` Pinned: ${pinnedCount}`)); 254 + if (groupedCount > 0) console.log(chalk.dim(` In tab groups: ${groupedCount}`)); 255 + if (windowCount > 1) console.log(chalk.dim(` Across ${windowCount} windows`)); 256 + 257 + console.log(chalk.cyan('Importing session tabs...')); 258 + const result = importSessions(tabs, profile.browser, profile.profileName); 206 259 totalImported += result.itemsImported; 207 260 totalTags += result.tagsCreated; 208 261 totalDuplicates += result.duplicatesSkipped; ··· 268 321 const entries = reader.readHistory(profile.profilePath); 269 322 console.log(` History: ${chalk.white(entries.length.toLocaleString())} entries would be imported`); 270 323 if (entries.length > 0) { 324 + const totalVisits = entries.reduce((sum, e) => sum + (e.visits?.length || 0), 0); 325 + if (totalVisits > 0) { 326 + console.log(chalk.dim(` Including ${totalVisits.toLocaleString()} individual visit records`)); 327 + } 271 328 console.log(chalk.dim(' Sample entries:')); 272 329 for (const entry of entries.slice(0, 5)) { 273 - console.log(chalk.dim(` ${entry.title || entry.url}`)); 330 + console.log(chalk.dim(` ${entry.title || entry.url} (${entry.visitCount} visits)`)); 274 331 } 275 332 } 276 333 } ··· 283 340 for (const entry of entries.slice(0, 5)) { 284 341 const folder = entry.folderPath ? ` [${entry.folderPath}]` : ''; 285 342 console.log(chalk.dim(` ${entry.title || entry.url}${folder}`)); 343 + } 344 + } 345 + } 346 + 347 + if (importTypes.includes('sessions')) { 348 + const tabs = reader.readSessions(profile.profilePath); 349 + console.log(` Sessions: ${chalk.white(tabs.length.toLocaleString())} tabs would be imported`); 350 + if (tabs.length > 0) { 351 + const pinnedCount = tabs.filter(t => t.pinned).length; 352 + const groupedCount = tabs.filter(t => t.tabGroup).length; 353 + const windowCount = new Set(tabs.map(t => t.windowIndex)).size; 354 + 355 + if (pinnedCount > 0) console.log(chalk.dim(` Pinned: ${pinnedCount}`)); 356 + if (groupedCount > 0) console.log(chalk.dim(` In tab groups: ${groupedCount}`)); 357 + if (windowCount > 1) console.log(chalk.dim(` Across ${windowCount} windows`)); 358 + 359 + console.log(chalk.dim(' Sample tabs:')); 360 + for (const tab of tabs.slice(0, 5)) { 361 + const flags = [ 362 + tab.pinned ? 'pinned' : '', 363 + tab.tabGroup ? `group:${tab.tabGroup}` : '', 364 + ].filter(Boolean).join(', '); 365 + const flagStr = flags ? ` (${flags})` : ''; 366 + console.log(chalk.dim(` ${tab.title || tab.url}${flagStr}`)); 286 367 } 287 368 } 288 369 }
+2
tools/browser-import/src/profiles.ts
··· 38 38 const FIREFOX_DATA_FILES: Array<{ type: DataType; filename: string; importable: boolean }> = [ 39 39 { type: 'history', filename: 'places.sqlite', importable: true }, 40 40 { type: 'bookmarks', filename: 'places.sqlite', importable: true }, 41 + { type: 'sessions', filename: 'sessionstore-backups', importable: true }, 41 42 { type: 'passwords', filename: 'logins.json', importable: false }, 42 43 { type: 'cookies', filename: 'cookies.sqlite', importable: false }, 43 44 { type: 'formdata', filename: 'formhistory.sqlite', importable: false }, ··· 48 49 const CHROMIUM_DATA_FILES: Array<{ type: DataType; filename: string; importable: boolean }> = [ 49 50 { type: 'history', filename: 'History', importable: true }, 50 51 { type: 'bookmarks', filename: 'Bookmarks', importable: true }, 52 + { type: 'sessions', filename: 'Preferences', importable: true }, 51 53 { type: 'passwords', filename: 'Login Data', importable: false }, 52 54 { type: 'cookies', filename: 'Cookies', importable: false }, 53 55 { type: 'formdata', filename: 'Web Data', importable: false },
+19 -2
tools/browser-import/src/types.ts
··· 18 18 importable: boolean; 19 19 } 20 20 21 - export type DataType = 'history' | 'bookmarks' | 'passwords' | 'cookies' | 'formdata' | 'extensions'; 21 + export type DataType = 'history' | 'bookmarks' | 'sessions' | 'passwords' | 'cookies' | 'formdata' | 'extensions'; 22 22 23 23 export interface DataInspection { 24 24 type: DataType; ··· 32 32 title: string; 33 33 visitCount: number; 34 34 lastVisitTime: number; // Unix ms 35 + /** Individual visit timestamps (Firefox moz_historyvisits, Chrome visits table) */ 36 + visits?: VisitRecord[]; 37 + } 38 + 39 + export interface VisitRecord { 40 + timestamp: number; // Unix ms 41 + /** Firefox visit type: 1=LINK, 2=TYPED, 3=BOOKMARK, etc. */ 42 + visitType?: number; 35 43 } 36 44 37 45 export interface BookmarkEntry { ··· 41 49 dateAdded: number; // Unix ms 42 50 } 43 51 52 + export interface SessionTab { 53 + url: string; 54 + title: string; 55 + tabGroup?: string; 56 + pinned?: boolean; 57 + lastAccessed?: number; // Unix ms 58 + windowIndex?: number; 59 + } 60 + 44 61 export interface ImportResult { 45 62 itemsImported: number; 46 63 tagsCreated: number; ··· 48 65 errors: string[]; 49 66 } 50 67 51 - export type ImportType = 'history' | 'bookmarks'; 68 + export type ImportType = 'history' | 'bookmarks' | 'sessions';
+60
tools/browser-import/src/util/jsonlz4.ts
··· 1 + /** 2 + * Mozilla jsonlz4 decompressor 3 + * 4 + * Firefox uses a custom compression format for session files (.jsonlz4): 5 + * - 8-byte magic header: "mozLz40\0" 6 + * - 4-byte little-endian uint32: uncompressed size 7 + * - Remaining bytes: raw LZ4 block-compressed data 8 + * 9 + * This is NOT standard LZ4 framing — it's a raw LZ4 block with a Mozilla-specific header. 10 + * Mozilla adopted LZ4 before the framing standard was finalized, so they use their own wrapper. 11 + */ 12 + 13 + import { readFileSync } from 'fs'; 14 + // @ts-ignore — lz4 doesn't have great type definitions 15 + import * as lz4 from 'lz4'; 16 + 17 + const MOZLZ4_MAGIC = 'mozLz40\0'; 18 + 19 + /** 20 + * Decompress a .jsonlz4 file and parse the JSON contents. 21 + * 22 + * @param filePath Path to the .jsonlz4 file 23 + * @returns Parsed JSON object 24 + * @throws Error if the file is not a valid jsonlz4 file 25 + */ 26 + export function decompressJsonlz4(filePath: string): any { 27 + const buf = readFileSync(filePath); 28 + 29 + // Verify magic header 30 + if (buf.length < 12) { 31 + throw new Error(`File too small to be jsonlz4: ${buf.length} bytes`); 32 + } 33 + 34 + const magic = buf.subarray(0, 8).toString('ascii'); 35 + if (magic !== MOZLZ4_MAGIC) { 36 + throw new Error( 37 + `Invalid jsonlz4 magic header: expected "mozLz40\\0", got "${magic}"` 38 + ); 39 + } 40 + 41 + // Read uncompressed size (4-byte LE uint32) 42 + const uncompressedSize = buf.readUInt32LE(8); 43 + 44 + // Extract compressed data (everything after the 12-byte header) 45 + const compressed = buf.subarray(12); 46 + 47 + // Allocate output buffer with the known uncompressed size 48 + const output = Buffer.alloc(uncompressedSize); 49 + 50 + // Decompress the raw LZ4 block 51 + const decodedSize = lz4.decodeBlock(compressed, output); 52 + 53 + if (decodedSize < 0) { 54 + throw new Error(`LZ4 decompression failed at offset ${Math.abs(decodedSize)}`); 55 + } 56 + 57 + // Parse the decompressed data as UTF-8 JSON 58 + const jsonString = output.subarray(0, decodedSize).toString('utf-8'); 59 + return JSON.parse(jsonString); 60 + }