experiments in a post-browser web
10
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat(tools): implement normalized frecency import with adaptive matching

+337 -41
+37
tools/browser-import/src/datastore.ts
··· 279 279 } 280 280 281 281 /** 282 + * Update the frecencyScore for an item 283 + */ 284 + export function updateFrecencyScore(itemId: string, score: number): void { 285 + if (!db) throw new Error('Database not opened'); 286 + db.prepare( 287 + 'UPDATE items SET frecencyScore = ?, updatedAt = ? WHERE id = ?' 288 + ).run(score, now(), itemId); 289 + } 290 + 291 + /** 292 + * Merge a key into an item's existing metadata JSON. 293 + * Reads the current metadata, adds/overwrites the given key, writes back. 294 + */ 295 + export function updateItemMetadata(itemId: string, key: string, value: unknown): void { 296 + if (!db) throw new Error('Database not opened'); 297 + 298 + const row = db.prepare( 299 + 'SELECT metadata FROM items WHERE id = ?' 300 + ).get(itemId) as { metadata: string } | undefined; 301 + 302 + if (!row) return; 303 + 304 + let metadata: Record<string, unknown>; 305 + try { 306 + metadata = JSON.parse(row.metadata || '{}'); 307 + } catch { 308 + metadata = {}; 309 + } 310 + 311 + metadata[key] = value; 312 + 313 + db.prepare( 314 + 'UPDATE items SET metadata = ?, updatedAt = ? WHERE id = ?' 315 + ).run(JSON.stringify(metadata), now(), itemId); 316 + } 317 + 318 + /** 282 319 * Close the database 283 320 */ 284 321 export function closeDatabase(): void {
+230 -37
tools/browser-import/src/importers/frecency.ts
··· 1 1 /** 2 - * Firefox Frecency & Adaptive Matching — Research and Planning 2 + * Firefox Frecency & Adaptive Matching — Import Module 3 + * 4 + * Reads Firefox's frecency scores and adaptive matching data from places.sqlite, 5 + * normalizes scores to Peek's scale, and updates imported items. 3 6 * 4 - * This module reads Firefox's frecency scores and adaptive matching data 5 - * from places.sqlite. These provide rich ranking signals that can enhance 6 - * Peek's own frecency system. 7 + * For Chrome/Brave, calculates a frecency proxy from visit count + recency. 7 8 * 8 9 * ============================================================ 9 10 * FIREFOX FRECENCY ALGORITHM ··· 61 62 * The adaptive matching data is decay-weighted: each time the entry is used, 62 63 * use_count = (use_count * 0.9) + 1. Old associations decay toward 0. 63 64 * 64 - * This is extremely valuable for command palette matching in Peek — it 65 - * captures the user's actual abbreviation patterns. 65 + * This is valuable for command palette matching in Peek — it captures the 66 + * user's actual abbreviation patterns. 66 67 * 67 68 * ============================================================ 68 - * TODO: MAPPING TO PEEK'S SYSTEM 69 + * NORMALIZATION APPROACH 69 70 * ============================================================ 70 71 * 71 - * Peek has its own frecency system (items.frecencyScore column). Potential 72 - * integration approaches: 72 + * Peek's calculateItemFrecency produces scores by summing per-visit 73 + * contributions: weight * decay * 10, where decay = 1/(1 + sqrt(ageDays/7)). 74 + * For a heavily used site with 10 recent visits, scores reach ~200. 75 + * Realistic range: 0-2000 for very active users. 73 76 * 74 - * 1. **Direct import**: Copy Firefox's frecency score into Peek's 75 - * frecencyScore column. Simple but scores may not be comparable 76 - * with Peek's internally-generated scores. 77 - * 78 - * 2. **Normalized import**: Scale Firefox frecency scores to match 79 - * Peek's expected range. Need to analyze Peek's frecency calculation 80 - * to determine the right normalization. 81 - * 82 - * 3. **Seed data**: Import Firefox frecency as metadata, then let Peek 83 - * rebuild its own frecency using Firefox's visit data as a starting 84 - * point. This is the most correct approach. 85 - * 86 - * For adaptive matching: 87 - * 88 - * 1. **Metadata storage**: Import input associations into item metadata 89 - * as `{ adaptiveMatches: [{ input: "gi", useCount: 5.2 }, ...] }`. 90 - * The cmd palette could read these for boosting. 91 - * 92 - * 2. **Separate table**: Create a dedicated search_hints table for 93 - * input->URL associations. More structured but requires schema change. 94 - * 95 - * 3. **item_events**: Record each adaptive match as an item_event with 96 - * content = input text. Preserves the timeline. 97 - * 98 - * Recommended approach: Option 3 (seed data) for frecency, Option 1 99 - * (metadata) for adaptive matching. The visit timeline is already being 100 - * imported into item_events, so Peek can recalculate frecency from that 101 - * raw data. The adaptive matches are small enough to fit in metadata. 77 + * Firefox scores range from 0 to 100,000+. We normalize by mapping 78 + * Firefox's observed min/max range onto 0-2000 (Peek's practical max), 79 + * using a logarithmic scale since Firefox scores are heavily skewed. 102 80 */ 103 81 104 82 import { existsSync, copyFileSync, mkdtempSync } from 'fs'; 105 83 import { join } from 'path'; 106 84 import { tmpdir } from 'os'; 107 85 import Database from 'better-sqlite3'; 86 + import type { BrowserName, FrecencyImportResult } from '../types.js'; 87 + import { 88 + getItemIdByUrl, updateFrecencyScore, updateItemMetadata, runInTransaction, 89 + } from '../datastore.js'; 108 90 109 91 export interface FrecencyData { 110 92 url: string; ··· 116 98 url: string; 117 99 input: string; 118 100 useCount: number; 101 + } 102 + 103 + /** 104 + * Stats returned by inspect for frecency data 105 + */ 106 + export interface FrecencyInspection { 107 + frecencyCount: number; 108 + adaptiveCount: number; 119 109 } 120 110 121 111 /** ··· 217 207 if (db) db.close(); 218 208 } 219 209 } 210 + 211 + /** 212 + * Read Chrome/Brave history with visit count and last visit time for frecency proxy. 213 + * Returns URL -> { visitCount, lastVisitTime } map. 214 + */ 215 + export function readChromeFrecencyData(profilePath: string): FrecencyData[] { 216 + const dbPath = join(profilePath, 'History'); 217 + if (!existsSync(dbPath)) return []; 218 + 219 + let db: Database.Database | null = null; 220 + try { 221 + const tempPath = copyToTemp(dbPath); 222 + db = new Database(tempPath, { readonly: true }); 223 + 224 + const rows = db.prepare(` 225 + SELECT url, title, visit_count, 226 + (last_visit_time / 1000 - 11644473600000) as last_visit_ms 227 + FROM urls 228 + WHERE visit_count > 0 229 + AND url NOT LIKE 'chrome://%' 230 + AND url NOT LIKE 'chrome-extension://%' 231 + AND url NOT LIKE 'brave://%' 232 + ORDER BY visit_count DESC 233 + `).all() as Array<{ 234 + url: string; 235 + title: string | null; 236 + visit_count: number; 237 + last_visit_ms: number; 238 + }>; 239 + 240 + // Calculate a frecency proxy using Peek's own algorithm approach: 241 + // score = visitCount * decay, where decay is based on recency 242 + return rows.map(row => { 243 + const ageDays = Math.max(0, (Date.now() - row.last_visit_ms) / (1000 * 60 * 60 * 24)); 244 + const decay = 1 / (1 + Math.pow(ageDays / 7, 0.5)); 245 + // Scale similar to Peek: each visit contributes decay * 10 246 + const score = Math.round(row.visit_count * decay * 10); 247 + return { 248 + url: row.url, 249 + title: row.title || '', 250 + frecency: score, 251 + }; 252 + }); 253 + } catch { 254 + return []; 255 + } finally { 256 + if (db) db.close(); 257 + } 258 + } 259 + 260 + // Peek's practical frecency max — heavily used sites with many recent visits 261 + const PEEK_FRECENCY_MAX = 2000; 262 + 263 + /** 264 + * Normalize Firefox frecency scores to Peek's range (0 to PEEK_FRECENCY_MAX). 265 + * 266 + * Uses log scale because Firefox scores are heavily skewed: 267 + * most pages score < 100, while typed URLs can score > 20,000. 268 + */ 269 + function normalizeFirefoxScores(data: FrecencyData[]): Map<string, number> { 270 + const result = new Map<string, number>(); 271 + if (data.length === 0) return result; 272 + 273 + // Use log scale for normalization 274 + const logScores = data.map(d => Math.log1p(d.frecency)); 275 + const logMin = Math.min(...logScores); 276 + const logMax = Math.max(...logScores); 277 + const logRange = logMax - logMin; 278 + 279 + for (let i = 0; i < data.length; i++) { 280 + const logScore = logScores[i]; 281 + // Map log(score) from [logMin, logMax] to [0, PEEK_FRECENCY_MAX] 282 + const normalized = logRange > 0 283 + ? Math.round(((logScore - logMin) / logRange) * PEEK_FRECENCY_MAX) 284 + : Math.round(PEEK_FRECENCY_MAX / 2); 285 + result.set(data[i].url, normalized); 286 + } 287 + 288 + return result; 289 + } 290 + 291 + /** 292 + * Group adaptive matches by URL for efficient metadata storage. 293 + */ 294 + function groupAdaptiveByUrl(matches: AdaptiveMatch[]): Map<string, Array<{ input: string; useCount: number }>> { 295 + const grouped = new Map<string, Array<{ input: string; useCount: number }>>(); 296 + for (const match of matches) { 297 + const existing = grouped.get(match.url); 298 + if (existing) { 299 + existing.push({ input: match.input, useCount: match.useCount }); 300 + } else { 301 + grouped.set(match.url, [{ input: match.input, useCount: match.useCount }]); 302 + } 303 + } 304 + return grouped; 305 + } 306 + 307 + /** 308 + * Inspect frecency data available in a profile without importing. 309 + */ 310 + export function inspectFrecency(profilePath: string, browser: BrowserName): FrecencyInspection { 311 + if (browser === 'firefox') { 312 + const frecencyData = readFrecencyData(profilePath); 313 + const adaptiveData = readAdaptiveMatchingData(profilePath); 314 + return { 315 + frecencyCount: frecencyData.length, 316 + adaptiveCount: adaptiveData.length, 317 + }; 318 + } else { 319 + // Chrome/Brave — frecency proxy from visit data, no adaptive matching 320 + const frecencyData = readChromeFrecencyData(profilePath); 321 + return { 322 + frecencyCount: frecencyData.length, 323 + adaptiveCount: 0, 324 + }; 325 + } 326 + } 327 + 328 + /** 329 + * Import frecency scores and adaptive matching data into Peek's datastore. 330 + * 331 + * For Firefox: normalizes frecency scores from Firefox's scale to Peek's range, 332 + * and imports adaptive matching data as item metadata. 333 + * 334 + * For Chrome/Brave: calculates a frecency proxy from visit count + recency. 335 + * 336 + * Only updates items that already exist in the database (matched by URL). 337 + */ 338 + export function importFrecency( 339 + profilePath: string, 340 + browser: BrowserName, 341 + ): FrecencyImportResult { 342 + const result: FrecencyImportResult = { 343 + frecencyUpdated: 0, 344 + adaptiveImported: 0, 345 + notFound: 0, 346 + errors: [], 347 + }; 348 + 349 + // Step 1: Read and normalize frecency data 350 + let normalizedScores: Map<string, number>; 351 + 352 + if (browser === 'firefox') { 353 + const frecencyData = readFrecencyData(profilePath); 354 + if (frecencyData.length === 0) { 355 + result.errors.push('No frecency data found in Firefox profile'); 356 + return result; 357 + } 358 + normalizedScores = normalizeFirefoxScores(frecencyData); 359 + } else { 360 + // Chrome/Brave — scores are already in Peek-compatible range 361 + const frecencyData = readChromeFrecencyData(profilePath); 362 + if (frecencyData.length === 0) { 363 + result.errors.push(`No frecency data found in ${browser} profile`); 364 + return result; 365 + } 366 + normalizedScores = new Map(frecencyData.map(d => [d.url, d.frecency])); 367 + } 368 + 369 + // Step 2: Update frecency scores for matching items 370 + runInTransaction(() => { 371 + for (const [url, score] of normalizedScores) { 372 + try { 373 + const itemId = getItemIdByUrl(url); 374 + if (itemId) { 375 + updateFrecencyScore(itemId, score); 376 + result.frecencyUpdated++; 377 + } else { 378 + result.notFound++; 379 + } 380 + } catch (err) { 381 + result.errors.push(`Frecency update failed for ${url}: ${(err as Error).message}`); 382 + } 383 + } 384 + }); 385 + 386 + // Step 3: Import adaptive matching data (Firefox only) 387 + if (browser === 'firefox') { 388 + const adaptiveData = readAdaptiveMatchingData(profilePath); 389 + if (adaptiveData.length > 0) { 390 + const grouped = groupAdaptiveByUrl(adaptiveData); 391 + 392 + runInTransaction(() => { 393 + for (const [url, matches] of grouped) { 394 + try { 395 + const itemId = getItemIdByUrl(url); 396 + if (itemId) { 397 + // Sort matches by useCount descending for readability 398 + matches.sort((a, b) => b.useCount - a.useCount); 399 + updateItemMetadata(itemId, 'adaptiveMatches', matches); 400 + result.adaptiveImported++; 401 + } 402 + // Don't increment notFound again — already counted in frecency pass 403 + } catch (err) { 404 + result.errors.push(`Adaptive match import failed for ${url}: ${(err as Error).message}`); 405 + } 406 + } 407 + }); 408 + } 409 + } 410 + 411 + return result; 412 + }
+1 -1
tools/browser-import/src/importers/index.ts
··· 5 5 export { importHistory } from './history.js'; 6 6 export { importBookmarks } from './bookmarks.js'; 7 7 export { importSessions } from './sessions.js'; 8 - export { readFrecencyData, readAdaptiveMatchingData } from './frecency.js'; 8 + export { readFrecencyData, readAdaptiveMatchingData, importFrecency, inspectFrecency } from './frecency.js';
+61 -2
tools/browser-import/src/index.ts
··· 18 18 import { discoverAllProfiles, findProfile } from './profiles.js'; 19 19 import { firefoxReader } from './browsers/firefox.js'; 20 20 import { chromeReader } from './browsers/chrome.js'; 21 - import { importHistory, importBookmarks, importSessions } from './importers/index.js'; 21 + import { importHistory, importBookmarks, importSessions, importFrecency, inspectFrecency } from './importers/index.js'; 22 22 import { openDatabase, closeDatabase } from './datastore.js'; 23 23 import type { BrowserProfile, BrowserName, ImportType } from './types.js'; 24 24 import type { BrowserReader } from './browsers/types.js'; ··· 110 110 console.log(` ${icon} ${item.label}: ${count} entries ${status}`); 111 111 } 112 112 113 + // Show frecency stats 114 + const frecencyStats = inspectFrecency(profile.profilePath, profile.browser); 115 + if (frecencyStats.frecencyCount > 0) { 116 + console.log(` ${chalk.green('\u2713')} Frecency: ${chalk.white(frecencyStats.frecencyCount.toLocaleString())} scored URLs ${chalk.green('(importable, opt-in)')}`); 117 + } 118 + if (frecencyStats.adaptiveCount > 0) { 119 + console.log(` ${chalk.green('\u2713')} Adaptive Matching: ${chalk.white(frecencyStats.adaptiveCount.toLocaleString())} input associations ${chalk.green('(importable, opt-in)')}`); 120 + } 121 + 113 122 console.log(); 114 123 console.log(chalk.dim('Use "browser-import import <profile>" to import data.')); 115 124 }); ··· 121 130 .description('Import browser data into a Peek datastore') 122 131 .argument('<profile>', 'Profile path or "browser/name"') 123 132 .option('--db <path>', 'Path to Peek database file', './peek.db') 124 - .option('--types <types>', 'Comma-separated import types: history,bookmarks,sessions', 'history,bookmarks,sessions') 133 + .option('--types <types>', 'Comma-separated import types: history,bookmarks,sessions,frecency', 'history,bookmarks,sessions') 134 + .option('--import-frecency', 'Import frecency scores and adaptive matching data (shorthand for --types ...,frecency)') 125 135 .option('--tag-folders', 'Create tags from bookmark folder hierarchy (default: true)', true) 126 136 .option('--no-tag-folders', 'Skip creating tags from bookmark folder hierarchy') 127 137 .option('--dry-run', 'Show what would be imported without writing', false) 128 138 .action((profileQuery: string, options: { 129 139 db: string; 130 140 types: string; 141 + importFrecency: boolean; 131 142 dryRun: boolean; 132 143 tagFolders: boolean; 133 144 }) => { ··· 144 155 } 145 156 146 157 const importTypes = options.types.split(',').map(t => t.trim()) as ImportType[]; 158 + // --import-frecency is a shorthand to add frecency to the types list 159 + if (options.importFrecency && !importTypes.includes('frecency')) { 160 + importTypes.push('frecency'); 161 + } 147 162 const reader = getReader(profile.browser); 148 163 const dbPath = resolve(options.db); 149 164 ··· 275 290 console.log(); 276 291 } 277 292 293 + // Import frecency 294 + let frecencyUpdated = 0; 295 + let adaptiveImported = 0; 296 + if (importTypes.includes('frecency')) { 297 + console.log(chalk.cyan('Importing frecency data...')); 298 + 299 + const frecencyResult = importFrecency(profile.profilePath, profile.browser); 300 + frecencyUpdated = frecencyResult.frecencyUpdated; 301 + adaptiveImported = frecencyResult.adaptiveImported; 302 + totalErrors += frecencyResult.errors.length; 303 + 304 + console.log(chalk.green(` Updated frecency for ${frecencyResult.frecencyUpdated.toLocaleString()} items`)); 305 + if (frecencyResult.adaptiveImported > 0) { 306 + console.log(chalk.green(` Imported ${frecencyResult.adaptiveImported.toLocaleString()} adaptive matches`)); 307 + } 308 + if (frecencyResult.notFound > 0) { 309 + console.log(chalk.yellow(` Not found in DB: ${frecencyResult.notFound.toLocaleString()} URLs`)); 310 + } 311 + if (frecencyResult.errors.length > 0) { 312 + console.log(chalk.red(` Errors: ${frecencyResult.errors.length}`)); 313 + for (const err of frecencyResult.errors.slice(0, 5)) { 314 + console.log(chalk.red(` ${err}`)); 315 + } 316 + if (frecencyResult.errors.length > 5) { 317 + console.log(chalk.dim(` ... and ${frecencyResult.errors.length - 5} more`)); 318 + } 319 + } 320 + console.log(); 321 + } 322 + 278 323 // Summary 279 324 console.log(chalk.bold('Summary:')); 280 325 console.log(` Items imported: ${chalk.green(totalImported.toLocaleString())}`); 281 326 console.log(` Tags created: ${chalk.green(totalTags.toLocaleString())}`); 282 327 console.log(` Duplicates skipped: ${chalk.yellow(totalDuplicates.toLocaleString())}`); 328 + if (frecencyUpdated > 0) { 329 + console.log(` Frecency updated: ${chalk.green(frecencyUpdated.toLocaleString())}`); 330 + } 331 + if (adaptiveImported > 0) { 332 + console.log(` Adaptive matches: ${chalk.green(adaptiveImported.toLocaleString())}`); 333 + } 283 334 if (totalErrors > 0) { 284 335 console.log(` Errors: ${chalk.red(totalErrors.toLocaleString())}`); 285 336 } ··· 365 416 const flagStr = flags ? ` (${flags})` : ''; 366 417 console.log(chalk.dim(` ${tab.title || tab.url}${flagStr}`)); 367 418 } 419 + } 420 + } 421 + 422 + if (importTypes.includes('frecency')) { 423 + const stats = inspectFrecency(profile.profilePath, profile.browser); 424 + console.log(` Frecency: ${chalk.white(stats.frecencyCount.toLocaleString())} scored URLs would update frecency scores`); 425 + if (stats.adaptiveCount > 0) { 426 + console.log(` Adaptive Matching: ${chalk.white(stats.adaptiveCount.toLocaleString())} input associations would be imported as metadata`); 368 427 } 369 428 } 370 429 }
+8 -1
tools/browser-import/src/types.ts
··· 65 65 errors: string[]; 66 66 } 67 67 68 - export type ImportType = 'history' | 'bookmarks' | 'sessions'; 68 + export type ImportType = 'history' | 'bookmarks' | 'sessions' | 'frecency'; 69 + 70 + export interface FrecencyImportResult { 71 + frecencyUpdated: number; 72 + adaptiveImported: number; 73 + notFound: number; 74 + errors: string[]; 75 + }