experiments in a post-browser web
10
fork

Configure Feed

Select the types of activity you want to include in your feed.

refactor(entities): collapse into single resident tile

Same pattern as the websearch merge (commit 2426191a) and the
window-open registration fix (commit 34e89716). Entities had two
tile entries — background (lazy) and home (window) — that
communicated via pubsub round-trips.

Audit findings during the merge: entities internal background/home
boundary was a historical split, not a semantic one. home.js was
already importing from background.js directly for most calls. The
pubsub topics (entities:extract, entities:search, entities:get-for-url
and their response pairs) are an EXTERNAL API used by
app/page/page.js to request on-demand extraction, not internal
bg-to-home coordination. Those external topics are preserved.

Changes:
- manifest.json: two tile entries collapsed to one resident:true tile,
added missing subscribe topics (item:deleted, editor:changed,
sync:pull-completed), removed self scope.
- home.js: merged ~280 lines of background.js logic (extraction
pipeline, command registration, external pubsub handlers) into
the existing UI module. Async IIFE at top calls api.initialize,
runs background init, sets shutdown, then initUi on DOMContentLoaded.
- home.html: added connect-src and worker-src to CSP (background.html
had these, home did not).
- background.js and background.html left on disk, unreferenced.

+444 -22
+1 -1
features/entities/home.html
··· 2 2 <html> 3 3 <head> 4 4 <meta charset="utf-8"> 5 - <meta http-equiv="Content-Security-Policy" content="script-src 'self' 'unsafe-inline';"> 5 + <meta http-equiv="Content-Security-Policy" content="script-src 'self' 'unsafe-inline'; connect-src https: http:; worker-src blob:;"> 6 6 <meta name="viewport" content="width=device-width,user-scalable=no,initial-scale=1"> 7 7 <title>Entities</title> 8 8 <link rel="stylesheet" type="text/css" href="home.css">
+433 -6
features/entities/home.js
··· 1 1 /** 2 - * Entities Browser UI 2 + * Entities Tile 3 3 * 4 - * Card grid showing all extracted entities with type filtering and search. 5 - * Includes detail pane showing full entity metadata and all source pages. 4 + * Single-tile (resident: true) combining entity extraction/state and UI: 5 + * - Automatically extracts entities from visited pages 6 + * - Handles external pubsub requests (entities:extract, entities:search, entities:get-for-url) 7 + * - Card grid showing all extracted entities with type filtering and search 8 + * - Includes detail pane showing full entity metadata and all source pages 6 9 */ 7 10 8 - import { getEntities, getObservations, extractCurrentPage, setEntityFeedback } from './background.js'; 11 + import { extractRegexEntities } from './extractors/regex.js'; 12 + import { extractMicroformatEntities } from './extractors/microformats.js'; 13 + import { extractStructuredDataEntities, extractPageMetadata } from './extractors/structured-data.js'; 14 + import { processEntities } from './entity-matcher.js'; 15 + import { getEntities, getObservations, setEntityFeedback } from './entity-store.js'; 16 + import { ensureNameDatabase } from './name-validator.js'; 17 + import { registerNoun, unregisterNoun } from 'peek://ext/cmd/nouns.js'; 9 18 10 19 const api = window.app; 20 + 21 + // ==================== Background State (inlined from background.js) ==================== 22 + 23 + // Configuration 24 + const REEXTRACT_COOLDOWN_MS = 60 * 60 * 1000; // 1 hour 25 + const EXTRACTION_DELAY_MS = 2000; // Wait for page to finish rendering 26 + const CONFIDENCE_THRESHOLD = 0.7; 27 + 28 + // In-memory cache of recently extracted URLs 29 + const extractionCache = new Map(); 30 + 31 + // Pending extraction timers 32 + const pendingExtractions = new Map(); 33 + 34 + /** 35 + * Extract page metadata by requesting content from the main process. 36 + */ 37 + async function getPageContent(url) { 38 + try { 39 + const result = await api.datastore.extractPageContent(url); 40 + if (result && result.success) { 41 + console.log(`[entities] getPageContent via IPC for ${url}: html=${result.data.html?.length || 0} chars, text=${result.data.text?.length || 0} chars`); 42 + return result.data; 43 + } 44 + console.log(`[entities] IPC extractPageContent failed for ${url}: ${result?.error || 'no result'}, falling back to fetch`); 45 + } catch (err) { 46 + console.log(`[entities] IPC extractPageContent error for ${url}: ${err.message}, falling back to fetch`); 47 + } 48 + 49 + try { 50 + const response = await fetch(url); 51 + if (!response.ok) { 52 + console.log(`[entities] fetch fallback failed for ${url}: HTTP ${response.status}`); 53 + return null; 54 + } 55 + const html = await response.text(); 56 + console.log(`[entities] getPageContent via fetch for ${url}: html=${html.length} chars, hasJsonLd=${html.includes('application/ld+json')}`); 57 + const textContent = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '') 58 + .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '') 59 + .replace(/<[^>]+>/g, ' ') 60 + .replace(/\s+/g, ' ') 61 + .trim(); 62 + return { 63 + html, 64 + text: textContent.slice(0, 50000), 65 + title: '' 66 + }; 67 + } catch (err) { 68 + console.log(`[entities] fetch fallback error for ${url}: ${err.message}`); 69 + return null; 70 + } 71 + } 72 + 73 + async function enrichUrlItem(url, pageMetadata) { 74 + try { 75 + const items = await api.datastore.queryItems({ type: 'url', search: url, limit: 5 }); 76 + if (!items.success || !items.data) return; 77 + 78 + const urlVariants = new Set([url]); 79 + try { 80 + const parsed = new URL(url); 81 + urlVariants.add(parsed.toString()); 82 + if (parsed.pathname === '/') { 83 + const withSlash = parsed.origin + '/' + parsed.search + parsed.hash; 84 + const withoutSlash = parsed.origin + parsed.search + parsed.hash; 85 + urlVariants.add(withSlash); 86 + urlVariants.add(withoutSlash); 87 + } 88 + } catch { /* invalid URL */ } 89 + 90 + const item = items.data.find(i => urlVariants.has(i.content)); 91 + if (!item) return; 92 + 93 + if (pageMetadata.title && (!item.title || item.title === 'Loading...')) { 94 + await api.datastore.updateItemTitle(url, pageMetadata.title); 95 + } 96 + 97 + if (pageMetadata.favicon) { 98 + await api.datastore.updateItemFavicon(url, pageMetadata.favicon); 99 + } 100 + 101 + const existingMeta = item.metadata ? JSON.parse(item.metadata) : {}; 102 + if (existingMeta._og) return; 103 + 104 + const enrichedMeta = { 105 + ...existingMeta, 106 + _og: { 107 + title: pageMetadata.title || '', 108 + description: pageMetadata.description || '', 109 + image: pageMetadata.ogImage || '', 110 + siteName: pageMetadata.siteName || '', 111 + type: pageMetadata.ogType || '', 112 + extractedAt: Date.now() 113 + } 114 + }; 115 + 116 + await api.datastore.updateItem(item.id, { metadata: JSON.stringify(enrichedMeta) }); 117 + } catch (err) { 118 + console.warn('[entities] Failed to enrich URL item:', err.message); 119 + } 120 + } 121 + 122 + async function extractEntities(url, title = '') { 123 + const content = await getPageContent(url); 124 + if (!content) return []; 125 + 126 + const html = content.html || ''; 127 + const text = content.text || ''; 128 + const pageTitle = title || content.title || ''; 129 + 130 + try { 131 + const pageMetadata = extractPageMetadata(html, url); 132 + if (pageMetadata) { 133 + await enrichUrlItem(url, pageMetadata); 134 + } 135 + } catch (err) { 136 + console.warn('[entities] Page metadata enrichment failed:', err.message); 137 + } 138 + 139 + const safeExtract = (fn, name) => { 140 + try { 141 + const result = fn(); 142 + return Array.isArray(result) ? result : []; 143 + } catch (err) { 144 + console.warn(`[entities] ${name} extractor threw:`, err.message); 145 + return []; 146 + } 147 + }; 148 + 149 + const [regexEntities, microformatEntities, structuredDataEntities] = await Promise.all([ 150 + Promise.resolve(safeExtract(() => extractRegexEntities(text, url), 'regex')), 151 + Promise.resolve(safeExtract(() => extractMicroformatEntities(html, url), 'microformats')), 152 + Promise.resolve(safeExtract(() => extractStructuredDataEntities(html, url), 'structured-data')), 153 + ]); 154 + 155 + console.log(`[entities] Extraction results for ${url}: structured-data=${structuredDataEntities.length}, microformats=${microformatEntities.length}, regex=${regexEntities.length}, html.length=${html.length}, hasJsonLd=${html.includes('application/ld+json')}`); 156 + 157 + const allRaw = [ 158 + ...structuredDataEntities, 159 + ...microformatEntities, 160 + ...regexEntities, 161 + ]; 162 + 163 + if (allRaw.length === 0) return []; 164 + 165 + const processed = await processEntities(allRaw, { 166 + url, 167 + title: pageTitle 168 + }, CONFIDENCE_THRESHOLD); 169 + 170 + return processed; 171 + } 172 + 173 + function handlePageLoad(msg) { 174 + const url = msg.content || msg.url; 175 + if (!url || typeof url !== 'string') return; 176 + 177 + if (url.startsWith('peek://') || url.startsWith('about:') || url.startsWith('chrome://')) return; 178 + if (!url.startsWith('http://') && !url.startsWith('https://')) return; 179 + 180 + const lastExtracted = extractionCache.get(url); 181 + if (lastExtracted && Date.now() - lastExtracted < REEXTRACT_COOLDOWN_MS) { 182 + return; 183 + } 184 + 185 + if (pendingExtractions.has(url)) { 186 + clearTimeout(pendingExtractions.get(url)); 187 + } 188 + 189 + const timer = setTimeout(async () => { 190 + pendingExtractions.delete(url); 191 + 192 + try { 193 + const entities = await extractEntities(url, msg.title || ''); 194 + extractionCache.set(url, Date.now()); 195 + 196 + if (entities.length > 0) { 197 + api.pubsub.publish('entities:extracted', { 198 + url, 199 + entities: entities.map(e => ({ 200 + id: e.id, 201 + name: e.name, 202 + type: e.entityType, 203 + confidence: e.confidence, 204 + isNew: e.isNew 205 + })) 206 + }, api.scopes.GLOBAL); 207 + 208 + try { 209 + await api.context.set('entities', entities.map(e => ({ 210 + id: e.id, 211 + name: e.name, 212 + type: e.entityType, 213 + confidence: e.confidence 214 + })), { 215 + metadata: { url, extractedAt: Date.now() } 216 + }); 217 + } catch (err) { 218 + console.warn('[entities] Context update failed:', err.message); 219 + } 220 + } 221 + } catch (err) { 222 + console.error('[entities] Extraction failed for:', url, err); 223 + } 224 + }, EXTRACTION_DELAY_MS); 225 + 226 + pendingExtractions.set(url, timer); 227 + } 228 + 229 + async function extractCurrentPage() { 230 + const windowList = await api.window.list({ includeInternal: false }); 231 + if (!windowList.success || !windowList.windows.length) return; 232 + 233 + let activeWindow = windowList.windows[0]; 234 + const focusedId = await api.window.getFocusedVisibleWindowId(); 235 + if (focusedId) { 236 + const focused = windowList.windows.find(w => w.id === focusedId); 237 + if (focused) activeWindow = focused; 238 + } 239 + const url = activeWindow.url; 240 + 241 + if (!url || !url.startsWith('http')) return; 242 + 243 + extractionCache.delete(url); 244 + 245 + const entities = await extractEntities(url, activeWindow.title || ''); 246 + 247 + api.pubsub.publish('entities:extracted', { 248 + url, 249 + entities: (entities || []).map(e => ({ 250 + id: e.id, 251 + name: e.name, 252 + type: e.entityType, 253 + confidence: e.confidence, 254 + isNew: e.isNew 255 + })) 256 + }, api.scopes.GLOBAL); 257 + 258 + return entities; 259 + } 260 + 261 + function openEntityBrowser() { 262 + api.window.showSelf(); 263 + } 264 + 265 + function registerCommands() { 266 + registerNoun({ 267 + name: 'entities', 268 + singular: 'entity', 269 + description: 'Extracted people, places, organizations, and more', 270 + skipBare: true, 271 + 272 + query: async ({ search }) => { 273 + const entities = await getEntities({ 274 + search: search || undefined, 275 + limit: 50 276 + }); 277 + if (entities.length === 0) { 278 + return { output: 'No entities found.', mimeType: 'text/plain' }; 279 + } 280 + return { 281 + success: true, 282 + output: { 283 + data: entities.map(e => { 284 + let meta = {}; 285 + try { meta = JSON.parse(e.metadata || '{}'); } catch {} 286 + return { 287 + id: e.id, 288 + name: e.content, 289 + type: meta.entityType, 290 + confidence: meta.confidence 291 + }; 292 + }), 293 + mimeType: 'application/json', 294 + title: `Entities (${entities.length})` 295 + } 296 + }; 297 + }, 298 + 299 + browse: async () => { openEntityBrowser(); }, 300 + produces: 'application/json' 301 + }); 302 + 303 + api.commands.register({ 304 + name: 'extract entities', 305 + description: 'Extract entities from current page', 306 + execute: async () => { 307 + await extractCurrentPage(); 308 + } 309 + }); 310 + } 311 + 312 + function unregisterCommands() { 313 + unregisterNoun('entities'); 314 + api.commands.unregister('extract entities'); 315 + } 316 + 317 + function initBackground() { 318 + ensureNameDatabase(); 319 + 320 + registerCommands(); 321 + 322 + api.pubsub.subscribe('item:created', (msg) => { 323 + if (msg.itemType === 'url') { 324 + handlePageLoad(msg); 325 + } 326 + }, api.scopes.GLOBAL); 327 + 328 + api.pubsub.subscribe('page:content-ready', (msg) => { 329 + if (msg.url && msg.url.startsWith('http')) { 330 + handlePageLoad({ content: msg.url, title: msg.title || '' }); 331 + } 332 + }, api.scopes.GLOBAL); 333 + 334 + // External API: manual extraction request (used by page widget in app/page/page.js) 335 + api.pubsub.subscribe('entities:extract', async (msg) => { 336 + if (msg.url) { 337 + let entities = []; 338 + try { 339 + extractionCache.delete(msg.url); 340 + entities = await extractEntities(msg.url, msg.title || '') || []; 341 + } catch (err) { 342 + console.error('[entities] entities:extract handler failed:', err); 343 + } 344 + 345 + api.pubsub.publish('entities:extracted', { 346 + url: msg.url, 347 + entities: entities.map(e => ({ 348 + id: e.id, 349 + name: e.name, 350 + type: e.entityType, 351 + confidence: e.confidence, 352 + isNew: e.isNew 353 + })) 354 + }, api.scopes.GLOBAL); 355 + 356 + api.pubsub.publish('entities:extract:response', { 357 + url: msg.url, 358 + entities 359 + }, api.scopes.GLOBAL); 360 + } 361 + }, api.scopes.GLOBAL); 362 + 363 + // External API: entity search (used by external consumers) 364 + api.pubsub.subscribe('entities:search', async (msg) => { 365 + const entities = await getEntities({ 366 + entityType: msg.type, 367 + search: msg.query, 368 + limit: msg.limit || 50 369 + }); 370 + api.pubsub.publish('entities:search:response', { 371 + query: msg.query, 372 + entities 373 + }, api.scopes.GLOBAL); 374 + }, api.scopes.GLOBAL); 375 + 376 + // External API: get entities for URL (used by page widget in app/page/page.js) 377 + api.pubsub.subscribe('entities:get-for-url', async (msg) => { 378 + if (!msg.url) return; 379 + const events = await api.datastore.queryItemEvents({ 380 + limit: 100, 381 + order: 'desc' 382 + }); 383 + if (events.success) { 384 + const entityIds = new Set(); 385 + for (const evt of events.data) { 386 + if (evt.content === msg.url) { 387 + entityIds.add(evt.itemId); 388 + } 389 + } 390 + const entities = []; 391 + for (const id of entityIds) { 392 + const item = await api.datastore.getItem(id); 393 + if (item.success && item.data && item.data.type === 'entity') { 394 + entities.push(item.data); 395 + } 396 + } 397 + api.pubsub.publish('entities:get-for-url:response', { 398 + url: msg.url, 399 + entities 400 + }, api.scopes.GLOBAL); 401 + } 402 + }, api.scopes.GLOBAL); 403 + } 404 + 405 + function uninitBackground() { 406 + for (const timer of pendingExtractions.values()) { 407 + clearTimeout(timer); 408 + } 409 + pendingExtractions.clear(); 410 + unregisterCommands(); 411 + } 412 + 413 + // ==================== View State ==================== 11 414 12 415 // View states 13 416 const VIEW_LIST = 'list'; ··· 654 1057 655 1058 // ==================== Init ==================== 656 1059 657 - async function init() { 1060 + async function initUi() { 658 1061 await loadViewPrefs(); 659 1062 setupToolbar(); 660 1063 await renderEntities(); 661 1064 } 662 1065 663 - init(); 1066 + // ===== Top-level init (runs as module in home.html) ===== 1067 + 1068 + (async () => { 1069 + console.log('[ext:entities] home.js loaded'); 1070 + 1071 + // Initialize tile — validates capability token with main process 1072 + console.log('[ext:entities] initializing v2 tile'); 1073 + await api.initialize(); 1074 + 1075 + // Initialize background logic (extraction, commands, pubsub handlers) 1076 + initBackground(); 1077 + 1078 + // Register shutdown handler 1079 + api.onShutdown(() => { 1080 + console.log('[ext:entities] received shutdown'); 1081 + uninitBackground(); 1082 + }); 1083 + 1084 + // Initialize UI once DOM is ready 1085 + if (document.readyState === 'loading') { 1086 + document.addEventListener('DOMContentLoaded', initUi); 1087 + } else { 1088 + initUi(); 1089 + } 1090 + })();
+10 -15
features/entities/manifest.json
··· 8 8 "builtin": true, 9 9 "tiles": [ 10 10 { 11 - "id": "background", 12 - "type": "background", 13 - "url": "background.html", 14 - "lazy": false 15 - }, 16 - { 17 11 "id": "home", 18 - "type": "window", 19 12 "url": "home.html", 20 - "windowHints": { 21 - "role": "workspace", 22 - "key": "entities-browser", 23 - "width": 900, 24 - "height": 700, 25 - "title": "Entities" 26 - } 13 + "width": 900, 14 + "height": 700, 15 + "title": "Entities", 16 + "role": "workspace", 17 + "key": "entities-browser", 18 + "resident": true 27 19 } 28 20 ], 29 21 "capabilities": { 30 22 "pubsub": { 31 - "scopes": ["self", "global", "system"], 23 + "scopes": ["global", "system"], 32 24 "topics": [ 33 25 "ext:ready", 34 26 "ext:entities:shutdown", 35 27 "app:shutdown", 36 28 "item:created", 29 + "item:deleted", 37 30 "page:content-ready", 31 + "editor:changed", 32 + "sync:pull-completed", 38 33 "entities:extract", 39 34 "entities:extract:response", 40 35 "entities:extracted",