···1010import fs from 'node:fs';
1111import { pathToFileURL } from 'node:url';
12121313-import { initDatabase, closeDatabase, getDb, trackWindowLoad, updateItemTitle, updateModeForNavigation } from './datastore.js';
1313+import { initDatabase, closeDatabase, getDb, trackWindowLoad, updateItemTitle, updateModeForNavigation, getContextEntry } from './datastore.js';
1414import { registerScheme, initProtocol, registerExtensionPath, getExtensionPath, getRegisteredExtensionIds, registerThemePath, getRegisteredThemeIds } from './protocol.js';
1515import { discoverExtensions, loadExtensionManifest, isBuiltinExtensionEnabled, getExternalExtensions } from './extensions.js';
1616import { initTray } from './tray.js';
···7272 source: string;
7373 params: Record<string, unknown>;
7474}>();
7575+7676+// Recently closed windows stack (in-memory, most recent last)
7777+const MAX_CLOSED_WINDOWS = 20;
7878+7979+interface ClosedWindowEntry {
8080+ url: string; // Original URL (not the peek:// rewritten one)
8181+ source: string;
8282+ bounds: { x: number; y: number; width: number; height: number } | null;
8383+ groupMode: { groupId: string; groupName: string; color?: string } | null;
8484+ timestamp: number;
8585+}
8686+8787+const closedWindowStack: ClosedWindowEntry[] = [];
75887689/**
7790 * Initialize the application configuration
···182195 app.on('browser-window-created', (_, window) => {
183196 const windowId = window.id;
184197198198+ // Capture window bounds before the window is destroyed (for reopen-last-closed)
199199+ let lastBounds: { x: number; y: number; width: number; height: number } | null = null;
200200+ window.on('close', () => {
201201+ try {
202202+ if (!window.isDestroyed()) {
203203+ lastBounds = window.getBounds();
204204+ }
205205+ } catch {
206206+ // Ignore errors during shutdown
207207+ }
208208+ });
209209+185210 // Handle window close
186211 // Wrapped in try-catch to prevent errors during shutdown from stalling the quit sequence.
187212 // During app.quit(), windows close concurrently and some may already be destroyed.
···226251 id: windowId,
227252 source: windowData.source
228253 });
254254+255255+ // Save to closed window stack for reopen-last-closed
256256+ // Only save user-facing content windows with web URLs (not background, modals, palettes, etc.)
257257+ const address = windowData.params.address as string | undefined;
258258+ const role = windowData.params.role as string | undefined;
259259+ const isKeepLive = windowData.params.keepLive === true;
260260+ const isModal = windowData.params.modal === true;
261261+ const isWebUrl = address && (address.startsWith('http://') || address.startsWith('https://'));
262262+ const isContentRole = !role || role === 'content' || role === 'child-content' || role === 'workspace';
263263+264264+ if (isWebUrl && isContentRole && !isKeepLive && !isModal) {
265265+ // Check for group mode context
266266+ let groupMode: ClosedWindowEntry['groupMode'] = null;
267267+ try {
268268+ const modeEntry = getContextEntry('mode', windowId);
269269+ if (modeEntry && modeEntry.value === 'group' && modeEntry.metadata) {
270270+ groupMode = {
271271+ groupId: modeEntry.metadata.groupId as string,
272272+ groupName: modeEntry.metadata.groupName as string,
273273+ color: modeEntry.metadata.color as string | undefined,
274274+ };
275275+ }
276276+ } catch {
277277+ // Context may be cleaned up already during shutdown
278278+ }
279279+280280+ pushClosedWindow({
281281+ url: address,
282282+ source: windowData.source,
283283+ bounds: lastBounds,
284284+ groupMode,
285285+ timestamp: Date.now(),
286286+ });
287287+ }
229288 }
230289231290 windowRegistry.delete(windowId);
···937996 */
938997export function getAllWindows(): Array<[number, { source: string; params: Record<string, unknown> }]> {
939998 return Array.from(windowRegistry.entries());
999999+}
10001000+10011001+/**
10021002+ * Push a closed window entry onto the stack
10031003+ */
10041004+export function pushClosedWindow(entry: ClosedWindowEntry): void {
10051005+ closedWindowStack.push(entry);
10061006+ if (closedWindowStack.length > MAX_CLOSED_WINDOWS) {
10071007+ closedWindowStack.shift();
10081008+ }
10091009+ DEBUG && console.log('[reopen] Pushed closed window:', entry.url, 'stack size:', closedWindowStack.length);
10101010+}
10111011+10121012+/**
10131013+ * Pop the most recently closed window from the stack
10141014+ */
10151015+export function popClosedWindow(): ClosedWindowEntry | undefined {
10161016+ const entry = closedWindowStack.pop();
10171017+ DEBUG && console.log('[reopen] Popped closed window:', entry?.url, 'stack size:', closedWindowStack.length);
10181018+ return entry;
10191019+}
10201020+10211021+/**
10221022+ * Get the number of closed windows in the stack
10231023+ */
10241024+export function getClosedWindowCount(): number {
10251025+ return closedWindowStack.length;
9401026}
94110279421028/**
+99
backend/electron/oauth-loopback.ts
···11+/**
22+ * Generic OAuth loopback HTTP server for handling OAuth callbacks.
33+ *
44+ * Starts a temporary HTTP server on 127.0.0.1 with an OS-assigned port.
55+ * Returns the port so the caller can build a redirect_uri, then waits
66+ * for the OAuth callback GET request and returns the query parameters.
77+ *
88+ * Reusable by any extension or future OAuth flow.
99+ */
1010+1111+import * as http from 'node:http';
1212+1313+const SUCCESS_HTML = `<!DOCTYPE html>
1414+<html><head><meta charset="utf-8"><title>Authorization Complete</title>
1515+<style>body{font-family:system-ui;display:flex;align-items:center;justify-content:center;height:100vh;margin:0;background:#1a1a1a;color:#e0e0e0}
1616+.card{text-align:center;padding:2rem}.check{font-size:3rem;margin-bottom:1rem}</style></head>
1717+<body><div class="card"><div class="check">✓</div><p>Authorization complete. You can close this window.</p></div></body></html>`;
1818+1919+interface LoopbackServer {
2020+ port: number;
2121+ waitForCallback: () => Promise<{ params: Record<string, string> }>;
2222+ cancel: () => void;
2323+}
2424+2525+export function createLoopbackServer(opts?: {
2626+ callbackPath?: string;
2727+ timeoutMs?: number;
2828+}): Promise<LoopbackServer> {
2929+ const callbackPath = opts?.callbackPath ?? '/callback';
3030+ const timeoutMs = opts?.timeoutMs ?? 120_000;
3131+3232+ return new Promise((resolveOuter, rejectOuter) => {
3333+ let settled = false;
3434+ let callbackResolve: (value: { params: Record<string, string> }) => void;
3535+ let callbackReject: (reason: Error) => void;
3636+3737+ const callbackPromise = new Promise<{ params: Record<string, string> }>((res, rej) => {
3838+ callbackResolve = res;
3939+ callbackReject = rej;
4040+ });
4141+4242+ const server = http.createServer((req, res) => {
4343+ const url = new URL(req.url || '/', `http://127.0.0.1`);
4444+4545+ if (req.method === 'GET' && url.pathname === callbackPath) {
4646+ const params: Record<string, string> = {};
4747+ url.searchParams.forEach((v, k) => { params[k] = v; });
4848+4949+ res.writeHead(200, { 'Content-Type': 'text/html; charset=utf-8' });
5050+ res.end(SUCCESS_HTML);
5151+5252+ callbackResolve!({ params });
5353+ cleanup();
5454+ } else {
5555+ res.writeHead(404, { 'Content-Type': 'text/plain' });
5656+ res.end('Not Found');
5757+ }
5858+ });
5959+6060+ const timeout = setTimeout(() => {
6161+ if (!settled) {
6262+ callbackReject!(new Error('OAuth callback timed out'));
6363+ cleanup();
6464+ }
6565+ }, timeoutMs);
6666+6767+ function cleanup() {
6868+ settled = true;
6969+ clearTimeout(timeout);
7070+ try { server.close(); } catch {}
7171+ }
7272+7373+ server.listen(0, '127.0.0.1', () => {
7474+ const addr = server.address();
7575+ if (!addr || typeof addr === 'string') {
7676+ rejectOuter(new Error('Failed to bind loopback server'));
7777+ return;
7878+ }
7979+8080+ resolveOuter({
8181+ port: addr.port,
8282+ waitForCallback: () => callbackPromise,
8383+ cancel: () => {
8484+ if (!settled) {
8585+ callbackReject!(new Error('OAuth flow cancelled'));
8686+ cleanup();
8787+ }
8888+ },
8989+ });
9090+ });
9191+9292+ server.on('error', (err) => {
9393+ if (!settled) {
9494+ rejectOuter(err);
9595+ cleanup();
9696+ }
9797+ });
9898+ });
9999+}
+589
docs/entity-correlation-design.md
···11+# Entity Correlation System Design
22+33+**Date:** 2026-02-19
44+**Status:** Design proposal — not yet implemented
55+66+---
77+88+## 1. Current State Analysis
99+1010+### How Entities Work Today
1111+1212+Entities are stored as items in the unified `items` table with `type = 'entity'`. Each entity item has:
1313+1414+- **`content`**: The canonical name (e.g., "Elon Musk")
1515+- **`metadata`** (JSON): `entityType`, `aliases[]`, `attributes{}`, `confidence`, `extractor`, `mergedFrom[]`, `feedback{}`
1616+- **`visitCount`** / **`lastVisitAt`** / **`frecencyScore`**: Tracking how often/recently the entity appears
1717+1818+Observations (entity X was seen on page Y) are stored as `item_events` rows:
1919+2020+- **`itemId`**: Points to the entity item
2121+- **`content`**: The source page URL
2222+- **`value`**: Extraction confidence for that observation
2323+- **`occurredAt`**: Timestamp
2424+- **`metadata`** (JSON): `extractor`, `pageTitle`, `extractedText`, `context`
2525+2626+### Entity Types
2727+2828+The system recognizes these entity types: `person`, `organization`, `place`, `event`, `email`, `phone`, `date`, `product`, `creative_work`, `tracking_number`, `price`.
2929+3030+### Extractors
3131+3232+Three extractors run in parallel on each page:
3333+3434+1. **JSON-LD / structured data** (highest confidence, 0.85-0.9): Parses `<script type="application/ld+json">`, maps schema.org types to entity types. Also extracts OG meta tags and standard meta tags (author, publisher).
3535+2. **Microformats** (high confidence, 0.9-0.95): Parses h-card, h-event, h-adr markup.
3636+3. **Regex** (variable confidence, 0.6-0.9): Extracts emails, phone numbers, addresses, tracking numbers from raw text.
3737+3838+### What's Missing
3939+4040+Entities are entirely flat. Each entity knows which pages it appeared on (via `item_events`), but there is no concept of:
4141+4242+- Which entities appear together on the same pages
4343+- How strongly two entities are related
4444+- Clusters of related entities
4545+- Entity identity across name variations (limited alias support exists but is only used at extraction time)
4646+4747+The raw data for co-occurrence already exists in `item_events` — we just need to derive relationships from it.
4848+4949+---
5050+5151+## 2. Co-occurrence Model
5252+5353+### Core Insight
5454+5555+Two entities are related if they appear on the same pages. The more pages they share, and the fewer total pages each appears on independently, the stronger the signal. This is essentially the same statistical reasoning behind TF-IDF, applied to entity pairs instead of document terms.
5656+5757+### Data Structure: Edge Table
5858+5959+The fundamental unit is a **weighted edge** between two entities:
6060+6161+```
6262+entity_correlations:
6363+ entityA TEXT NOT NULL -- entity item ID (lexicographically smaller)
6464+ entityB TEXT NOT NULL -- entity item ID (lexicographically larger)
6565+ cooccurrenceCount INTEGER -- number of pages where both appear
6666+ pmi REAL -- pointwise mutual information score
6767+ lastCooccurrence INTEGER -- timestamp of most recent shared page
6868+ firstCooccurrence INTEGER -- timestamp of first shared page
6969+ updatedAt INTEGER -- when this edge was last recalculated
7070+ PRIMARY KEY (entityA, entityB)
7171+```
7272+7373+The canonical ordering (entityA < entityB lexicographically) ensures each pair is stored exactly once.
7474+7575+### Why PMI Over Raw Count
7676+7777+Raw co-occurrence count is misleading. If "Google" appears on 200 of your pages and "Chrome" appears on 150, they'll co-occur frequently just by volume. PMI (Pointwise Mutual Information) corrects for this:
7878+7979+```
8080+PMI(a, b) = log2( P(a,b) / (P(a) * P(b)) )
8181+```
8282+8383+Where:
8484+- `P(a,b)` = pages where both a and b appear / total pages with any entity
8585+- `P(a)` = pages where a appears / total pages with any entity
8686+- `P(b)` = pages where b appears / total pages with any entity
8787+8888+PMI is positive when entities co-occur more than chance, zero when independent, and negative when they avoid each other. For our purposes, we only care about positive PMI.
8989+9090+**Normalized PMI (NPMI)** bounds the score to [-1, 1]:
9191+9292+```
9393+NPMI(a, b) = PMI(a, b) / -log2(P(a,b))
9494+```
9595+9696+NPMI = 1 means perfect co-occurrence (they always appear together), NPMI = 0 means independence. This is more interpretable and comparable across entity pairs with different frequencies.
9797+9898+### Temporal Decay
9999+100100+Recent co-occurrences should matter more than old ones. Rather than decaying the PMI score itself (which would require continuous recomputation), we apply decay at query time:
101101+102102+```
103103+effective_score = NPMI * decay_factor
104104+decay_factor = 1 / (1 + days_since_last_cooccurrence / 30)
105105+```
106106+107107+This uses the same decay shape as the existing frecency calculation in the codebase (see `entity-store.js` line 103), with a 30-day half-life instead of 7 days — relationships are stickier than individual entity relevance.
108108+109109+The `lastCooccurrence` timestamp on each edge enables this without recomputing PMI.
110110+111111+---
112112+113113+## 3. Entity Resolution
114114+115115+### The Problem
116116+117117+The same real-world entity appears under different names:
118118+- "Elon Musk" / "Musk" / "@elonmusk" / "Elon R. Musk"
119119+- "United States" / "US" / "USA" / "the United States of America"
120120+- "SpaceX" / "Space Exploration Technologies Corp."
121121+122122+### Current State
123123+124124+The system already has an `aliases` array in entity metadata and `normalizeName()` (lowercase, trim, remove diacritics) for matching. But aliases are only populated when extraction finds an exact match — there's no fuzzy matching or alias discovery.
125125+126126+### Proposed Approach: Layered Resolution
127127+128128+Rather than one complex system, use three simple layers that each catch different cases:
129129+130130+#### Layer 1: Normalized Exact Match (exists today)
131131+132132+`normalizeName()` already handles case, whitespace, and diacritics. This catches "Elon Musk" vs "elon musk" vs "Elon Musk".
133133+134134+#### Layer 2: Token-Based Similarity
135135+136136+For person names, check if one name is a subset of another:
137137+138138+```javascript
139139+function isNameVariant(nameA, nameB) {
140140+ const tokensA = normalizeName(nameA).split(' ');
141141+ const tokensB = normalizeName(nameB).split(' ');
142142+143143+ // "Musk" matches "Elon Musk" (surname substring)
144144+ if (tokensA.length === 1 && tokensB.includes(tokensA[0])) return true;
145145+ if (tokensB.length === 1 && tokensA.includes(tokensB[0])) return true;
146146+147147+ // "E. Musk" matches "Elon Musk" (initial + surname)
148148+ if (tokensA.length >= 2 && tokensB.length >= 2) {
149149+ const lastA = tokensA[tokensA.length - 1];
150150+ const lastB = tokensB[tokensB.length - 1];
151151+ if (lastA === lastB) {
152152+ // Same surname — check if first names are compatible
153153+ const firstA = tokensA[0];
154154+ const firstB = tokensB[0];
155155+ if (firstA[0] === firstB[0]) return true; // Same initial
156156+ }
157157+ }
158158+159159+ return false;
160160+}
161161+```
162162+163163+This is conservative — it won't merge "John Smith" and "Jane Smith" because we require initial match. Apply only within the same `entityType`.
164164+165165+#### Layer 3: Co-occurrence-Based Alias Detection
166166+167167+If two entities of the same type have very high NPMI (> 0.8) and one has significantly fewer observations than the other, the rarer one is likely an alias. This catches "@elonmusk" being an alias of "Elon Musk" — they'll always appear on the same pages (Twitter/X profiles).
168168+169169+Criteria for automatic merge suggestion:
170170+- Same `entityType`
171171+- NPMI > 0.8
172172+- One entity has < 3 observations
173173+- The rarer entity's observations are a subset of the more common entity's observation pages
174174+175175+This should be surfaced as a **suggestion** in the UI rather than automatically merged, at least initially. Users can confirm or reject merges.
176176+177177+#### What We Don't Do
178178+179179+We deliberately avoid:
180180+- **Levenshtein distance**: Too many false positives at short edit distances ("Mars" vs "Musk")
181181+- **External knowledge bases**: Wikidata validation is being researched separately (see `docs/entity-validation-research.md`) and can feed into resolution later
182182+- **ML embeddings**: Overkill for hundreds of entities; adds significant complexity
183183+184184+---
185185+186186+## 4. Signal vs. Noise
187187+188188+### Problem
189189+190190+Not all co-occurrences are meaningful. Boilerplate entities (the publisher name, the site author) co-occur with everything on that site. Generic entities ("United States", "Google") appear everywhere.
191191+192192+### Statistical Filters
193193+194194+#### Minimum Co-occurrence Threshold
195195+196196+Require at least **3 co-occurrences** before storing an edge. A single shared page is noise; two could be coincidence; three starts to be a pattern. At this scale (hundreds of pages), this is a practical cutoff.
197197+198198+```sql
199199+-- Only create/maintain edges where cooccurrenceCount >= 3
200200+```
201201+202202+#### PMI Minimum
203203+204204+Only retain edges with **NPMI > 0.1**. This filters out pairs that co-occur roughly at chance level. NPMI of 0.1 means they co-occur about 10% more than you'd expect by random chance.
205205+206206+#### Maximum Frequency Cap
207207+208208+Entities that appear on more than 30% of all entity-bearing pages are "stopword entities" — too common to be informative. Examples: the user's own name, their employer, "Google", "United States". Exclude them from correlation computation (but don't delete them as entities).
209209+210210+```javascript
211211+const FREQUENCY_CAP = 0.3; // entities on >30% of pages are excluded from correlation
212212+```
213213+214214+This is analogous to TF-IDF's inverse document frequency — the most common terms carry the least information.
215215+216216+#### Per-Site Deduplication
217217+218218+If a user reads 20 articles on nytimes.com, the NYT publisher entity will co-occur with every entity from those articles. To prevent site-level boilerplate from dominating:
219219+220220+Count co-occurrences by **unique domains**, not raw page count. Two entities co-occurring across 3 different domains is a much stronger signal than co-occurring on 3 pages from the same domain.
221221+222222+```
223223+entity_correlations:
224224+ ...
225225+ domainCount INTEGER -- number of unique domains where both appear
226226+```
227227+228228+Use `domainCount` as the primary co-occurrence signal rather than raw page count for PMI calculation.
229229+230230+---
231231+232232+## 5. Clustering
233233+234234+### Algorithm Choice: Connected Components with Threshold
235235+236236+For the scale of hundreds to low thousands of entities, sophisticated graph community detection (Louvain, Girvan-Newman) is unnecessary. A simple threshold-based approach works well:
237237+238238+1. Build an adjacency graph from edges with NPMI > 0.3
239239+2. Find connected components using BFS/DFS
240240+3. Within each component, identify the "hub" entity (highest degree, i.e., most connections)
241241+242242+This naturally produces clusters like:
243243+- {Elon Musk, Tesla, SpaceX, SEC, Fremont} — a tech/business cluster
244244+- {Biden, Congress, White House, legislation} — a politics cluster
245245+- {React, JavaScript, Node.js, npm} — a tech stack cluster
246246+247247+### Hierarchical Clustering (Future)
248248+249249+For finer-grained clustering within large components, use a simple single-linkage approach:
250250+251251+1. Start with NPMI > 0.6 (tight clusters)
252252+2. If a component has > 20 entities, try a higher threshold (0.5) to break it into subclusters
253253+3. Present as nested groups: "Tech" > "SpaceX ecosystem", "Tesla ecosystem"
254254+255255+This is a future enhancement — connected components at a single threshold is the MVP.
256256+257257+### Cluster Metadata
258258+259259+Each cluster gets derived metadata:
260260+261261+```javascript
262262+{
263263+ id: 'cluster_abc123',
264264+ entities: ['entity_id_1', 'entity_id_2', ...],
265265+ hub: 'entity_id_1', // most connected entity
266266+ dominantType: 'person', // most common entity type in cluster
267267+ label: 'Elon Musk', // hub entity name as default label
268268+ totalObservations: 45, // sum of entity observations
269269+ lastActivity: 1708300000000, // most recent observation timestamp
270270+ density: 0.72 // ratio of actual edges to possible edges
271271+}
272272+```
273273+274274+Clusters are ephemeral — computed on demand from the edge table, not stored permanently. They change as new browsing data arrives.
275275+276276+---
277277+278278+## 6. Schema Design
279279+280280+### New Table: `entity_correlations`
281281+282282+```sql
283283+CREATE TABLE IF NOT EXISTS entity_correlations (
284284+ entityA TEXT NOT NULL,
285285+ entityB TEXT NOT NULL,
286286+ cooccurrenceCount INTEGER NOT NULL DEFAULT 0,
287287+ domainCount INTEGER NOT NULL DEFAULT 0,
288288+ npmi REAL NOT NULL DEFAULT 0,
289289+ lastCooccurrence INTEGER NOT NULL DEFAULT 0,
290290+ firstCooccurrence INTEGER NOT NULL DEFAULT 0,
291291+ updatedAt INTEGER NOT NULL DEFAULT 0,
292292+ PRIMARY KEY (entityA, entityB),
293293+ FOREIGN KEY(entityA) REFERENCES items(id),
294294+ FOREIGN KEY(entityB) REFERENCES items(id)
295295+);
296296+297297+CREATE INDEX IF NOT EXISTS idx_entity_corr_a ON entity_correlations(entityA);
298298+CREATE INDEX IF NOT EXISTS idx_entity_corr_b ON entity_correlations(entityB);
299299+CREATE INDEX IF NOT EXISTS idx_entity_corr_npmi ON entity_correlations(npmi DESC);
300300+CREATE INDEX IF NOT EXISTS idx_entity_corr_updated ON entity_correlations(updatedAt);
301301+```
302302+303303+### New Table: `entity_page_map` (Denormalized Lookup)
304304+305305+The existing `item_events` table already stores entity-page relationships, but querying "which entities appeared on page X" requires scanning all events. A denormalized lookup table makes co-occurrence computation fast:
306306+307307+```sql
308308+CREATE TABLE IF NOT EXISTS entity_page_map (
309309+ entityId TEXT NOT NULL,
310310+ pageUrl TEXT NOT NULL,
311311+ pageDomain TEXT NOT NULL,
312312+ observedAt INTEGER NOT NULL,
313313+ PRIMARY KEY (entityId, pageUrl),
314314+ FOREIGN KEY(entityId) REFERENCES items(id)
315315+);
316316+317317+CREATE INDEX IF NOT EXISTS idx_entity_page_url ON entity_page_map(pageUrl);
318318+CREATE INDEX IF NOT EXISTS idx_entity_page_domain ON entity_page_map(pageDomain);
319319+```
320320+321321+This table is populated from `item_events` during initial setup and kept in sync as new observations arrive. It's a materialized view — the source of truth remains `item_events`.
322322+323323+### New Table: `entity_merge_suggestions`
324324+325325+```sql
326326+CREATE TABLE IF NOT EXISTS entity_merge_suggestions (
327327+ id TEXT PRIMARY KEY,
328328+ entityA TEXT NOT NULL,
329329+ entityB TEXT NOT NULL,
330330+ reason TEXT NOT NULL, -- 'name_variant', 'high_cooccurrence', 'manual'
331331+ confidence REAL NOT NULL,
332332+ status TEXT DEFAULT 'pending', -- 'pending', 'accepted', 'rejected'
333333+ createdAt INTEGER NOT NULL,
334334+ resolvedAt INTEGER DEFAULT 0,
335335+ FOREIGN KEY(entityA) REFERENCES items(id),
336336+ FOREIGN KEY(entityB) REFERENCES items(id)
337337+);
338338+```
339339+340340+### Query Patterns
341341+342342+The schema is designed around these primary queries:
343343+344344+**"Show me everything related to entity X":**
345345+```sql
346346+SELECT entityB AS relatedId, npmi, cooccurrenceCount, domainCount
347347+FROM entity_correlations
348348+WHERE entityA = ? AND npmi > 0.1
349349+UNION ALL
350350+SELECT entityA AS relatedId, npmi, cooccurrenceCount, domainCount
351351+FROM entity_correlations
352352+WHERE entityB = ? AND npmi > 0.1
353353+ORDER BY npmi DESC
354354+LIMIT 20;
355355+```
356356+357357+**"What clusters exist" (all edges above threshold):**
358358+```sql
359359+SELECT entityA, entityB, npmi
360360+FROM entity_correlations
361361+WHERE npmi > 0.3
362362+ORDER BY npmi DESC;
363363+```
364364+365365+Then compute connected components in JavaScript — the result set will be small (hundreds of edges at most).
366366+367367+**"What's trending" (recently active correlations):**
368368+```sql
369369+SELECT entityA, entityB, npmi, lastCooccurrence
370370+FROM entity_correlations
371371+WHERE lastCooccurrence > ? AND npmi > 0.2
372372+ORDER BY lastCooccurrence DESC
373373+LIMIT 50;
374374+```
375375+376376+**"Which entities co-occurred on this page":**
377377+```sql
378378+SELECT entityId FROM entity_page_map WHERE pageUrl = ?;
379379+```
380380+381381+---
382382+383383+## 7. Incremental Update Strategy
384384+385385+### Trigger: New Observation
386386+387387+When `addObservation()` records that entity X was seen on page Y:
388388+389389+1. **Update `entity_page_map`**: Insert or ignore `(X, Y, domain(Y), now)`
390390+2. **Find co-occurring entities**: Query `entity_page_map` for all other entities on page Y
391391+3. **Update edges**: For each co-occurring entity Z, upsert `entity_correlations(X, Z)`:
392392+ - Increment `cooccurrenceCount`
393393+ - Recalculate `domainCount` from `entity_page_map`
394394+ - Update `lastCooccurrence`
395395+ - Recalculate NPMI (requires total page count — cache this)
396396+397397+### Batch Recomputation
398398+399399+NPMI depends on global statistics (total pages, per-entity page counts) that change with every page visit. Rather than recomputing every edge on every observation, use a two-tier strategy:
400400+401401+**Tier 1 — Immediate (per observation):**
402402+- Update `cooccurrenceCount` and `lastCooccurrence` for affected edges
403403+- Use cached global stats for approximate NPMI
404404+405405+**Tier 2 — Periodic (background):**
406406+- Every 50 new observations (or on app startup), recompute NPMI for all edges
407407+- Update global stats cache
408408+- Prune edges below minimum thresholds
409409+- Generate merge suggestions from high-NPMI same-type pairs
410410+411411+The periodic recomputation is fast — at the scale of hundreds of entities, the correlation table will have at most a few thousand edges. A full recompute takes milliseconds in SQLite.
412412+413413+### Initial Bootstrap
414414+415415+On first run (or when the feature is enabled), populate `entity_page_map` from existing `item_events`:
416416+417417+```sql
418418+INSERT OR IGNORE INTO entity_page_map (entityId, pageUrl, pageDomain, observedAt)
419419+SELECT ie.itemId, ie.content, ?, ie.occurredAt
420420+FROM item_events ie
421421+JOIN items i ON ie.itemId = i.id
422422+WHERE i.type = 'entity' AND i.deletedAt = 0;
423423+```
424424+425425+Then compute all pairwise co-occurrences and NPMI scores. For N entities, the worst case is N*(N-1)/2 pairs, but the minimum co-occurrence threshold of 3 will prune most of these.
426426+427427+### Where This Code Lives
428428+429429+The correlation engine should be a **backend module** (`backend/electron/entity-correlations.ts`), not an extension. Reasons:
430430+431431+- It needs direct SQLite access for efficient batch queries
432432+- It runs on the main process, not in extension renderer
433433+- The entity extension calls it via IPC
434434+435435+The extension side (`extensions/entities/`) handles UI and user interaction. The backend handles data and computation.
436436+437437+---
438438+439439+## 8. UI Concepts
440440+441441+### 8.1 Related Entities Panel (MVP)
442442+443443+The simplest useful UI: when viewing an entity's detail page (which already exists in `home.js`), show a "Related Entities" section below "Source Pages":
444444+445445+```
446446+─── Related Entities ───────────────────
447447+ Elon Musk ████████░░ 0.82 (person)
448448+ SEC ██████░░░░ 0.61 (organization)
449449+ Fremont, CA █████░░░░░ 0.54 (place)
450450+ NHTSA ████░░░░░░ 0.41 (organization)
451451+```
452452+453453+Each row is clickable, navigating to that entity's detail view. The bar represents NPMI score. This requires minimal UI changes — just a new section in the existing detail view.
454454+455455+### 8.2 Cluster View
456456+457457+A new tab/mode in the entities browser that shows clusters instead of individual entities:
458458+459459+```
460460+Clusters
461461+─────────────────────────────────
462462+┌──────────────────────┐ ┌──────────────────────┐
463463+│ Tesla Ecosystem │ │ Web Standards │
464464+│ 8 entities │ │ 5 entities │
465465+│ ─────────────────── │ │ ─────────────────── │
466466+│ Elon Musk (hub) │ │ W3C (hub) │
467467+│ Tesla, SpaceX, SEC │ │ WHATWG, IndieWeb │
468468+│ + 4 more │ │ + 2 more │
469469+│ │ │ │
470470+│ Last active: 2h ago │ │ Last active: 1d ago │
471471+└──────────────────────┘ └──────────────────────┘
472472+```
473473+474474+Clicking a cluster expands it to show all member entities and their interconnections.
475475+476476+### 8.3 Graph Visualization (Future)
477477+478478+A force-directed graph where:
479479+- Nodes are entities (sized by observation count, colored by type)
480480+- Edges are correlations (thickness proportional to NPMI)
481481+- Clusters naturally separate in the layout
482482+483483+This is the most visually compelling but also the most complex to implement. Use a lightweight library like `d3-force` (already reasonable in an Electron renderer).
484484+485485+This should be a later phase — the related entities panel and cluster view provide most of the value with far less implementation effort.
486486+487487+### 8.4 Contextual Correlation Display
488488+489489+When browsing a web page, show a subtle indicator of how the entities on the current page relate to the user's existing knowledge graph. This could be:
490490+- A sidebar showing "You've seen these entities before, and they're related to X, Y, Z"
491491+- A badge on the entities extension icon showing how many correlated entities were found
492492+493493+This is speculative and depends on the page-level entity display being implemented first.
494494+495495+---
496496+497497+## 9. Implementation Phases
498498+499499+### Phase 1: Foundation (MVP)
500500+501501+**Goal:** Build the correlation data pipeline and show basic "related entities" in the UI.
502502+503503+1. Add `entity_correlations` and `entity_page_map` tables via datastore migration
504504+2. Implement `entity-correlations.ts` backend module with:
505505+ - `updateCorrelationsForPage(pageUrl, entityIds[])` — called after extraction
506506+ - `getRelatedEntities(entityId, limit)` — query for UI
507507+ - `recomputeAllCorrelations()` — batch recompute
508508+3. Bootstrap from existing `item_events` data
509509+4. Add "Related Entities" section to entity detail view in `home.js`
510510+5. Wire extraction pipeline to call `updateCorrelationsForPage` after `processEntities`
511511+512512+**Estimated scope:** ~500 lines of backend code, ~100 lines of UI additions.
513513+514514+### Phase 2: Signal Quality
515515+516516+**Goal:** Improve correlation quality with noise filtering and domain-aware scoring.
517517+518518+1. Implement domain-based co-occurrence counting
519519+2. Add frequency cap filtering (>30% entities excluded)
520520+3. Periodic NPMI recomputation on app startup
521521+4. Prune stale/weak edges (NPMI < threshold and no co-occurrence in 90 days)
522522+523523+### Phase 3: Entity Resolution
524524+525525+**Goal:** Detect and suggest entity merges.
526526+527527+1. Implement token-based name variant detection (Layer 2)
528528+2. Implement co-occurrence-based alias detection (Layer 3)
529529+3. Add `entity_merge_suggestions` table and UI for accepting/rejecting
530530+4. Merge operation: combine observations, update aliases, redirect correlations
531531+532532+### Phase 4: Clustering & Visualization
533533+534534+**Goal:** Surface higher-level structure.
535535+536536+1. Implement connected-component clustering
537537+2. Add cluster view to entities browser
538538+3. Cluster metadata computation (hub, label, density)
539539+4. Optional: force-directed graph visualization with d3-force
540540+541541+### Phase 5: Contextual Integration
542542+543543+**Goal:** Surface correlations during browsing.
544544+545545+1. After entity extraction, check how page entities relate to existing clusters
546546+2. Show contextual "related to your interests" indicators
547547+3. Integration with groups/workspaces — suggest grouping related pages by entity cluster
548548+549549+---
550550+551551+## 10. Performance Considerations
552552+553553+### Scale Estimates
554554+555555+For a personal browser:
556556+- **Entities:** 100-2,000 (after months of use)
557557+- **Pages with entities:** 200-5,000
558558+- **Entity-page pairs:** 500-20,000
559559+- **Correlation edges:** 200-5,000 (after threshold filtering)
560560+- **Clusters:** 10-100
561561+562562+### SQLite Performance
563563+564564+All operations are well within SQLite's comfort zone:
565565+566566+- **Pairwise co-occurrence computation**: For 1,000 entities across 2,000 pages, the entity_page_map has ~10,000 rows. Finding all pairs for a single page (say 5 entities = 10 pairs) is a simple index lookup. The full batch recompute touches at most a few thousand edges.
567567+568568+- **NPMI calculation**: Pure arithmetic on cached counts. Sub-millisecond per edge.
569569+570570+- **Cluster computation**: Loading all edges above threshold (a few hundred rows) into memory and running BFS takes microseconds.
571571+572572+### Memory
573573+574574+The correlation engine maintains in memory:
575575+- Global page count (one integer)
576576+- Per-entity page count cache (Map of ~1,000 entries)
577577+- Nothing else — everything else is queried from SQLite on demand
578578+579579+---
580580+581581+## 11. Open Questions
582582+583583+1. **Should correlations cross entity types?** A person and an organization co-occurring is interesting ("Elon Musk" + "Tesla"). But should a person and a phone number be correlated? Probably yes — if an email address always appears with a person name, that's a useful signal. Start with cross-type correlations and filter in the UI if needed.
584584+585585+2. **How to handle entity deletion?** When a user thumbs-down an entity (existing feature), should its correlations be removed? Probably yes — cascade the deletion to `entity_correlations` and `entity_page_map`. This keeps the graph clean.
586586+587587+3. **Extension vs. backend boundary:** The correlation engine does heavy SQL work and should live in the backend. But the entity extraction pipeline lives in the extension. The cleanest boundary: the extension publishes `entities:extracted` events (already happens), and the backend subscribes to compute correlations. The extension queries correlations via IPC for display.
588588+589589+4. **Privacy implications:** The correlation graph is a dense summary of browsing interests. It should follow the same data storage and sync policies as other Peek data. No new privacy concerns beyond what entity extraction already introduces.
+689
extensions/me-core/atproto.js
···11+/**
22+ * AT Protocol API helpers for the Me extension.
33+ *
44+ * Handles authentication via OAuth (PKCE + DPoP) using the generic
55+ * loopback server, and authenticated XRPC requests against the user's PDS.
66+ */
77+88+// Feature detection
99+const hasPeekAPI = typeof window.app !== 'undefined';
1010+const api = hasPeekAPI ? window.app : null;
1111+1212+// ============================================================================
1313+// Public API (unauthenticated)
1414+// ============================================================================
1515+1616+const PUBLIC_API = 'https://public.api.bsky.app';
1717+1818+/**
1919+ * Search actors via the public Bluesky API (no auth needed).
2020+ * @param {string} query - Search query
2121+ * @returns {Promise<Array<{handle: string, did: string, displayName: string, avatar: string}>>}
2222+ */
2323+export async function searchActors(query) {
2424+ if (!query || query.length < 2) return [];
2525+ const url = `${PUBLIC_API}/xrpc/app.bsky.actor.searchActorsTypeahead?q=${encodeURIComponent(query)}&limit=8`;
2626+ try {
2727+ const res = await fetch(url);
2828+ if (!res.ok) return [];
2929+ const data = await res.json();
3030+ return (data.actors || []).map(a => ({
3131+ handle: a.handle,
3232+ did: a.did || null,
3333+ displayName: a.displayName || null,
3434+ avatar: a.avatar || null,
3535+ }));
3636+ } catch {
3737+ return [];
3838+ }
3939+}
4040+4141+/**
4242+ * Resolve a handle to a DID via public API.
4343+ * @param {string} handle
4444+ * @returns {Promise<string|null>} DID or null
4545+ */
4646+export async function resolveHandle(handle) {
4747+ try {
4848+ const res = await fetch(
4949+ `${PUBLIC_API}/xrpc/com.atproto.identity.resolveHandle?handle=${encodeURIComponent(handle)}`
5050+ );
5151+ if (!res.ok) return null;
5252+ const data = await res.json();
5353+ return data.did || null;
5454+ } catch {
5555+ return null;
5656+ }
5757+}
5858+5959+/**
6060+ * Get a user's profile from the public API.
6161+ * @param {string} actor - DID or handle
6262+ * @returns {Promise<Object|null>}
6363+ */
6464+export async function getProfile(actor) {
6565+ try {
6666+ const res = await fetch(
6767+ `${PUBLIC_API}/xrpc/app.bsky.actor.getProfile?actor=${encodeURIComponent(actor)}`
6868+ );
6969+ if (!res.ok) return null;
7070+ return await res.json();
7171+ } catch {
7272+ return null;
7373+ }
7474+}
7575+7676+// ============================================================================
7777+// PDS Discovery
7878+// ============================================================================
7979+8080+/**
8181+ * Discover the user's PDS URL from their DID document.
8282+ * @param {string} did
8383+ * @returns {Promise<string|null>} PDS service URL
8484+ */
8585+export async function discoverPds(did) {
8686+ try {
8787+ let didDoc;
8888+ if (did.startsWith('did:web:')) {
8989+ const host = did.replace('did:web:', '');
9090+ const res = await fetch(`https://${host}/.well-known/did.json`);
9191+ if (res.ok) didDoc = await res.json();
9292+ } else if (did.startsWith('did:plc:')) {
9393+ const res = await fetch(`https://plc.directory/${did}`);
9494+ if (res.ok) didDoc = await res.json();
9595+ }
9696+9797+ if (!didDoc || !didDoc.service) return null;
9898+9999+ const pdsService = didDoc.service.find(
100100+ s => s.id === '#atproto_pds' || s.type === 'AtprotoPersonalDataServer'
101101+ );
102102+ return pdsService ? pdsService.serviceEndpoint : null;
103103+ } catch {
104104+ return null;
105105+ }
106106+}
107107+108108+// ============================================================================
109109+// OAuth Authorization Server Discovery
110110+// ============================================================================
111111+112112+/**
113113+ * Discover the OAuth authorization server metadata for a PDS.
114114+ * @param {string} pdsUrl
115115+ * @returns {Promise<Object>} Authorization server metadata
116116+ */
117117+async function discoverAuthServer(pdsUrl) {
118118+ // Step 1: Get the authorization server URL from the PDS
119119+ const resourceRes = await fetch(`${pdsUrl}/.well-known/oauth-protected-resource`);
120120+ if (!resourceRes.ok) {
121121+ throw new Error(`PDS does not support OAuth (${resourceRes.status})`);
122122+ }
123123+ const resourceMeta = await resourceRes.json();
124124+ const authServerUrl = resourceMeta.authorization_servers?.[0];
125125+ if (!authServerUrl) {
126126+ throw new Error('No authorization server found in PDS metadata');
127127+ }
128128+129129+ // Step 2: Get the authorization server metadata
130130+ const authRes = await fetch(`${authServerUrl}/.well-known/oauth-authorization-server`);
131131+ if (!authRes.ok) {
132132+ throw new Error(`Failed to fetch auth server metadata (${authRes.status})`);
133133+ }
134134+ const authMeta = await authRes.json();
135135+ return authMeta;
136136+}
137137+138138+// ============================================================================
139139+// PKCE Helpers (Web Crypto)
140140+// ============================================================================
141141+142142+/**
143143+ * Generate a PKCE code verifier (43-128 chars, unreserved URI chars).
144144+ * @returns {string}
145145+ */
146146+function generateCodeVerifier() {
147147+ const array = new Uint8Array(32);
148148+ crypto.getRandomValues(array);
149149+ return base64urlEncode(array);
150150+}
151151+152152+/**
153153+ * Generate a PKCE code challenge from a verifier (S256).
154154+ * @param {string} verifier
155155+ * @returns {Promise<string>}
156156+ */
157157+async function generateCodeChallenge(verifier) {
158158+ const encoded = new TextEncoder().encode(verifier);
159159+ const hash = await crypto.subtle.digest('SHA-256', encoded);
160160+ return base64urlEncode(new Uint8Array(hash));
161161+}
162162+163163+// ============================================================================
164164+// DPoP Helpers (Web Crypto)
165165+// ============================================================================
166166+167167+/**
168168+ * Generate a DPoP EC P-256 keypair.
169169+ * @returns {Promise<CryptoKeyPair>}
170170+ */
171171+async function generateDpopKeyPair() {
172172+ return crypto.subtle.generateKey(
173173+ { name: 'ECDSA', namedCurve: 'P-256' },
174174+ true, // extractable for storage
175175+ ['sign', 'verify']
176176+ );
177177+}
178178+179179+/**
180180+ * Export a CryptoKeyPair to JWK format for storage.
181181+ * @param {CryptoKeyPair} keyPair
182182+ * @returns {Promise<{publicKey: Object, privateKey: Object}>}
183183+ */
184184+export async function exportKeyPair(keyPair) {
185185+ const publicKey = await crypto.subtle.exportKey('jwk', keyPair.publicKey);
186186+ const privateKey = await crypto.subtle.exportKey('jwk', keyPair.privateKey);
187187+ return { publicKey, privateKey };
188188+}
189189+190190+/**
191191+ * Import a JWK keypair back to CryptoKeyPair.
192192+ * @param {{publicKey: Object, privateKey: Object}} jwks
193193+ * @returns {Promise<CryptoKeyPair>}
194194+ */
195195+export async function importKeyPair(jwks) {
196196+ const publicKey = await crypto.subtle.importKey(
197197+ 'jwk', jwks.publicKey,
198198+ { name: 'ECDSA', namedCurve: 'P-256' },
199199+ true, ['verify']
200200+ );
201201+ const privateKey = await crypto.subtle.importKey(
202202+ 'jwk', jwks.privateKey,
203203+ { name: 'ECDSA', namedCurve: 'P-256' },
204204+ true, ['sign']
205205+ );
206206+ return { publicKey, privateKey };
207207+}
208208+209209+/**
210210+ * Create a DPoP proof JWT.
211211+ * @param {CryptoKeyPair} keyPair
212212+ * @param {string} method - HTTP method (GET, POST)
213213+ * @param {string} url - Target URL
214214+ * @param {string} [nonce] - Server-provided DPoP nonce
215215+ * @param {string} [ath] - Access token hash (for resource requests)
216216+ * @returns {Promise<string>} Signed JWT
217217+ */
218218+async function createDpopProof(keyPair, method, url, nonce, ath) {
219219+ const publicJwk = await crypto.subtle.exportKey('jwk', keyPair.publicKey);
220220+ // Remove private fields from the public JWK
221221+ const { d: _d, ...cleanPublicJwk } = publicJwk;
222222+223223+ const header = {
224224+ typ: 'dpop+jwt',
225225+ alg: 'ES256',
226226+ jwk: cleanPublicJwk,
227227+ };
228228+229229+ const payload = {
230230+ jti: generateJti(),
231231+ htm: method,
232232+ htu: url,
233233+ iat: Math.floor(Date.now() / 1000),
234234+ };
235235+ if (nonce) payload.nonce = nonce;
236236+ if (ath) payload.ath = ath;
237237+238238+ const headerB64 = base64urlEncode(new TextEncoder().encode(JSON.stringify(header)));
239239+ const payloadB64 = base64urlEncode(new TextEncoder().encode(JSON.stringify(payload)));
240240+ const signingInput = `${headerB64}.${payloadB64}`;
241241+242242+ const signature = await crypto.subtle.sign(
243243+ { name: 'ECDSA', hash: 'SHA-256' },
244244+ keyPair.privateKey,
245245+ new TextEncoder().encode(signingInput)
246246+ );
247247+248248+ // ECDSA signature from WebCrypto is in IEEE P1363 format (r||s, 64 bytes for P-256).
249249+ // JWT expects this format, so no conversion needed.
250250+ const sigB64 = base64urlEncode(new Uint8Array(signature));
251251+ return `${signingInput}.${sigB64}`;
252252+}
253253+254254+/**
255255+ * Compute access token hash for DPoP proof.
256256+ * @param {string} accessToken
257257+ * @returns {Promise<string>} base64url-encoded SHA-256 hash
258258+ */
259259+async function computeAth(accessToken) {
260260+ const hash = await crypto.subtle.digest('SHA-256', new TextEncoder().encode(accessToken));
261261+ return base64urlEncode(new Uint8Array(hash));
262262+}
263263+264264+// ============================================================================
265265+// Base64url Encoding
266266+// ============================================================================
267267+268268+/**
269269+ * Base64url encode a Uint8Array (no padding).
270270+ * @param {Uint8Array} data
271271+ * @returns {string}
272272+ */
273273+function base64urlEncode(data) {
274274+ const binStr = Array.from(data, b => String.fromCharCode(b)).join('');
275275+ return btoa(binStr).replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, '');
276276+}
277277+278278+function generateJti() {
279279+ const array = new Uint8Array(16);
280280+ crypto.getRandomValues(array);
281281+ return Array.from(array, b => b.toString(16).padStart(2, '0')).join('');
282282+}
283283+284284+// ============================================================================
285285+// OAuth Login Flow
286286+// ============================================================================
287287+288288+/**
289289+ * @typedef {Object} OAuthSession
290290+ * @property {string} did
291291+ * @property {string} handle
292292+ * @property {string} accessToken
293293+ * @property {string} refreshToken
294294+ * @property {string} tokenEndpoint
295295+ * @property {string} pdsUrl
296296+ * @property {{publicKey: Object, privateKey: Object}} dpopKeyPairJwk - JWK format for storage
297297+ */
298298+299299+/**
300300+ * Login via OAuth PKCE + DPoP using the loopback server.
301301+ *
302302+ * @param {string} handle
303303+ * @returns {Promise<OAuthSession>}
304304+ */
305305+export async function loginWithOAuth(handle) {
306306+ // 1. Resolve handle → DID → PDS
307307+ const did = await resolveHandle(handle);
308308+ if (!did) throw new Error(`Could not resolve handle: ${handle}`);
309309+310310+ const pdsUrl = await discoverPds(did);
311311+ if (!pdsUrl) throw new Error(`Could not discover PDS for ${handle}`);
312312+313313+ // 2. Discover auth server
314314+ const authMeta = await discoverAuthServer(pdsUrl);
315315+ const authEndpoint = authMeta.authorization_endpoint;
316316+ const tokenEndpoint = authMeta.token_endpoint;
317317+ const pushedAuthEndpoint = authMeta.pushed_authorization_request_endpoint;
318318+319319+ if (!authEndpoint || !tokenEndpoint) {
320320+ throw new Error('Authorization server missing required endpoints');
321321+ }
322322+323323+ // 3. Start loopback server
324324+ const loopback = await api.oauth.startLoopback();
325325+ if (!loopback.success) throw new Error(loopback.error || 'Failed to start loopback server');
326326+ const port = loopback.port;
327327+ const redirectUri = `http://127.0.0.1:${port}/callback`;
328328+329329+ try {
330330+ // 4. Generate PKCE
331331+ const codeVerifier = generateCodeVerifier();
332332+ const codeChallenge = await generateCodeChallenge(codeVerifier);
333333+334334+ // 5. Generate DPoP keypair
335335+ const dpopKeyPair = await generateDpopKeyPair();
336336+337337+ // 6. Build client_id (loopback client metadata URL)
338338+ const scope = 'atproto transition:generic';
339339+ const clientId = `http://localhost?redirect_uri=${encodeURIComponent(redirectUri)}&scope=${encodeURIComponent(scope)}`;
340340+341341+ // 7. Generate state
342342+ const stateArray = new Uint8Array(16);
343343+ crypto.getRandomValues(stateArray);
344344+ const state = base64urlEncode(stateArray);
345345+346346+ // 8. Build authorization URL or use PAR
347347+ let authUrl;
348348+ if (pushedAuthEndpoint) {
349349+ // Use Pushed Authorization Requests (PAR) — required by many AT Protocol auth servers
350350+ const parBody = new URLSearchParams({
351351+ client_id: clientId,
352352+ redirect_uri: redirectUri,
353353+ response_type: 'code',
354354+ scope,
355355+ state,
356356+ code_challenge: codeChallenge,
357357+ code_challenge_method: 'S256',
358358+ login_hint: handle,
359359+ });
360360+361361+ const dpopProof = await createDpopProof(dpopKeyPair, 'POST', pushedAuthEndpoint);
362362+ const parRes = await fetch(pushedAuthEndpoint, {
363363+ method: 'POST',
364364+ headers: {
365365+ 'Content-Type': 'application/x-www-form-urlencoded',
366366+ 'DPoP': dpopProof,
367367+ },
368368+ body: parBody.toString(),
369369+ });
370370+371371+ if (!parRes.ok) {
372372+ // Check for DPoP nonce requirement
373373+ const dpopNonce = parRes.headers.get('DPoP-Nonce');
374374+ if (dpopNonce && parRes.status === 400) {
375375+ // Retry with nonce
376376+ const retryProof = await createDpopProof(dpopKeyPair, 'POST', pushedAuthEndpoint, dpopNonce);
377377+ const retryRes = await fetch(pushedAuthEndpoint, {
378378+ method: 'POST',
379379+ headers: {
380380+ 'Content-Type': 'application/x-www-form-urlencoded',
381381+ 'DPoP': retryProof,
382382+ },
383383+ body: parBody.toString(),
384384+ });
385385+ if (!retryRes.ok) {
386386+ const err = await retryRes.json().catch(() => ({}));
387387+ throw new Error(err.error_description || err.error || `PAR failed (${retryRes.status})`);
388388+ }
389389+ const parData = await retryRes.json();
390390+ authUrl = `${authEndpoint}?client_id=${encodeURIComponent(clientId)}&request_uri=${encodeURIComponent(parData.request_uri)}`;
391391+ } else {
392392+ const err = await parRes.json().catch(() => ({}));
393393+ throw new Error(err.error_description || err.error || `PAR failed (${parRes.status})`);
394394+ }
395395+ } else {
396396+ const parData = await parRes.json();
397397+ authUrl = `${authEndpoint}?client_id=${encodeURIComponent(clientId)}&request_uri=${encodeURIComponent(parData.request_uri)}`;
398398+ }
399399+ } else {
400400+ // Direct authorization URL
401401+ const params = new URLSearchParams({
402402+ client_id: clientId,
403403+ redirect_uri: redirectUri,
404404+ response_type: 'code',
405405+ scope,
406406+ state,
407407+ code_challenge: codeChallenge,
408408+ code_challenge_method: 'S256',
409409+ login_hint: handle,
410410+ });
411411+ authUrl = `${authEndpoint}?${params.toString()}`;
412412+ }
413413+414414+ // 9. Open auth window
415415+ api.window.open(authUrl, {
416416+ width: 600,
417417+ height: 700,
418418+ role: 'modal',
419419+ title: 'AT Protocol Authorization',
420420+ });
421421+422422+ // 10. Wait for callback
423423+ const callbackResult = await api.oauth.awaitCallback(port);
424424+ if (!callbackResult.success) throw new Error(callbackResult.error || 'OAuth callback failed');
425425+ const params = callbackResult.params;
426426+427427+ // 11. Verify state
428428+ if (params.state !== state) {
429429+ throw new Error('OAuth state mismatch — possible CSRF attack');
430430+ }
431431+ if (params.error) {
432432+ throw new Error(params.error_description || params.error);
433433+ }
434434+ const code = params.code;
435435+ if (!code) throw new Error('No authorization code received');
436436+437437+ // Also capture iss if provided
438438+ const iss = params.iss;
439439+440440+ // 12. Exchange code for tokens (with DPoP proof)
441441+ const tokenBody = new URLSearchParams({
442442+ grant_type: 'authorization_code',
443443+ code,
444444+ redirect_uri: redirectUri,
445445+ client_id: clientId,
446446+ code_verifier: codeVerifier,
447447+ });
448448+449449+ // Token exchange with DPoP nonce retry
450450+ let dpopNonce;
451451+ let tokenData = await tokenExchange(tokenEndpoint, tokenBody, dpopKeyPair, dpopNonce);
452452+453453+ // 13. Export DPoP keypair for storage
454454+ const dpopKeyPairJwk = await exportKeyPair(dpopKeyPair);
455455+456456+ return {
457457+ did,
458458+ handle,
459459+ accessToken: tokenData.access_token,
460460+ refreshToken: tokenData.refresh_token,
461461+ tokenEndpoint,
462462+ pdsUrl,
463463+ dpopKeyPairJwk,
464464+ sub: tokenData.sub || did,
465465+ };
466466+ } catch (err) {
467467+ // Cancel the loopback server if still pending
468468+ try { await api.oauth.awaitCallback(port); } catch {}
469469+ throw err;
470470+ }
471471+}
472472+473473+/**
474474+ * Exchange authorization code or refresh token at token endpoint with DPoP.
475475+ * Handles DPoP nonce retry automatically.
476476+ */
477477+async function tokenExchange(tokenEndpoint, body, dpopKeyPair, nonce) {
478478+ let dpopProof = await createDpopProof(dpopKeyPair, 'POST', tokenEndpoint, nonce);
479479+480480+ let res = await fetch(tokenEndpoint, {
481481+ method: 'POST',
482482+ headers: {
483483+ 'Content-Type': 'application/x-www-form-urlencoded',
484484+ 'DPoP': dpopProof,
485485+ },
486486+ body: body.toString(),
487487+ });
488488+489489+ // Handle DPoP nonce requirement (use_dpop_nonce error)
490490+ if (!res.ok) {
491491+ const dpopNonce = res.headers.get('DPoP-Nonce');
492492+ if (dpopNonce) {
493493+ dpopProof = await createDpopProof(dpopKeyPair, 'POST', tokenEndpoint, dpopNonce);
494494+ res = await fetch(tokenEndpoint, {
495495+ method: 'POST',
496496+ headers: {
497497+ 'Content-Type': 'application/x-www-form-urlencoded',
498498+ 'DPoP': dpopProof,
499499+ },
500500+ body: body.toString(),
501501+ });
502502+ }
503503+ }
504504+505505+ if (!res.ok) {
506506+ const err = await res.json().catch(() => ({}));
507507+ throw new Error(err.error_description || err.error || `Token exchange failed (${res.status})`);
508508+ }
509509+510510+ return res.json();
511511+}
512512+513513+// ============================================================================
514514+// Token Refresh
515515+// ============================================================================
516516+517517+/**
518518+ * Refresh an OAuth access token using the refresh token + DPoP.
519519+ * @param {OAuthSession} session
520520+ * @returns {Promise<OAuthSession>} Updated session with new tokens
521521+ */
522522+export async function refreshOAuthSession(session) {
523523+ const dpopKeyPair = await importKeyPair(session.dpopKeyPairJwk);
524524+525525+ const body = new URLSearchParams({
526526+ grant_type: 'refresh_token',
527527+ refresh_token: session.refreshToken,
528528+ client_id: `http://localhost?redirect_uri=${encodeURIComponent('http://127.0.0.1/callback')}&scope=${encodeURIComponent('atproto transition:generic')}`,
529529+ });
530530+531531+ const tokenData = await tokenExchange(session.tokenEndpoint, body, dpopKeyPair);
532532+533533+ return {
534534+ ...session,
535535+ accessToken: tokenData.access_token,
536536+ refreshToken: tokenData.refresh_token || session.refreshToken,
537537+ };
538538+}
539539+540540+// ============================================================================
541541+// Authenticated XRPC requests (DPoP)
542542+// ============================================================================
543543+544544+/**
545545+ * Make an authenticated GET request to the user's PDS using DPoP tokens.
546546+ * Automatically retries once with a refreshed token on 401.
547547+ *
548548+ * @param {OAuthSession} session
549549+ * @param {string} nsid - XRPC method
550550+ * @param {Object} [params] - Query parameters
551551+ * @param {function} [onSessionRefresh] - Called with updated session on token refresh
552552+ * @returns {Promise<Object>} Response data
553553+ */
554554+export async function xrpcGet(session, nsid, params = {}, onSessionRefresh = null) {
555555+ const qs = new URLSearchParams();
556556+ for (const [k, v] of Object.entries(params)) {
557557+ if (v !== undefined && v !== null) qs.set(k, String(v));
558558+ }
559559+ const qsStr = qs.toString();
560560+ const url = `${session.pdsUrl}/xrpc/${nsid}${qsStr ? '?' + qsStr : ''}`;
561561+562562+ const dpopKeyPair = await importKeyPair(session.dpopKeyPairJwk);
563563+564564+ // Compute access token hash for DPoP proof
565565+ const ath = await computeAth(session.accessToken);
566566+ let dpopProof = await createDpopProof(dpopKeyPair, 'GET', url, undefined, ath);
567567+568568+ let res = await fetch(url, {
569569+ headers: {
570570+ 'Authorization': `DPoP ${session.accessToken}`,
571571+ 'DPoP': dpopProof,
572572+ },
573573+ });
574574+575575+ // Handle DPoP nonce requirement
576576+ if (res.status === 401) {
577577+ const dpopNonce = res.headers.get('DPoP-Nonce');
578578+ if (dpopNonce) {
579579+ dpopProof = await createDpopProof(dpopKeyPair, 'GET', url, dpopNonce, ath);
580580+ res = await fetch(url, {
581581+ headers: {
582582+ 'Authorization': `DPoP ${session.accessToken}`,
583583+ 'DPoP': dpopProof,
584584+ },
585585+ });
586586+ }
587587+ }
588588+589589+ // Retry once on 401 (expired token)
590590+ if (res.status === 401 && session.refreshToken) {
591591+ try {
592592+ const refreshed = await refreshOAuthSession(session);
593593+ Object.assign(session, refreshed);
594594+ if (onSessionRefresh) onSessionRefresh(refreshed);
595595+596596+ const newAth = await computeAth(refreshed.accessToken);
597597+ dpopProof = await createDpopProof(dpopKeyPair, 'GET', url, undefined, newAth);
598598+599599+ res = await fetch(url, {
600600+ headers: {
601601+ 'Authorization': `DPoP ${refreshed.accessToken}`,
602602+ 'DPoP': dpopProof,
603603+ },
604604+ });
605605+606606+ // Handle nonce on retry
607607+ if (res.status === 401) {
608608+ const retryNonce = res.headers.get('DPoP-Nonce');
609609+ if (retryNonce) {
610610+ dpopProof = await createDpopProof(dpopKeyPair, 'GET', url, retryNonce, newAth);
611611+ res = await fetch(url, {
612612+ headers: {
613613+ 'Authorization': `DPoP ${refreshed.accessToken}`,
614614+ 'DPoP': dpopProof,
615615+ },
616616+ });
617617+ }
618618+ }
619619+ } catch {
620620+ // Refresh failed, throw the original 401
621621+ }
622622+ }
623623+624624+ if (!res.ok) {
625625+ const err = await res.json().catch(() => ({}));
626626+ throw new Error(err.message || `XRPC ${nsid} failed (${res.status})`);
627627+ }
628628+629629+ return res.json();
630630+}
631631+632632+// ============================================================================
633633+// Repo / Lexicon helpers
634634+// ============================================================================
635635+636636+/**
637637+ * Describe a repo to get the list of collection NSIDs.
638638+ * @param {OAuthSession} session
639639+ * @param {function} [onSessionRefresh]
640640+ * @returns {Promise<string[]>} Array of collection NSIDs
641641+ */
642642+export async function getCollections(session, onSessionRefresh) {
643643+ const data = await xrpcGet(session, 'com.atproto.repo.describeRepo', {
644644+ repo: session.did,
645645+ }, onSessionRefresh);
646646+ return data.collections || [];
647647+}
648648+649649+/**
650650+ * Count records in a collection.
651651+ * @param {OAuthSession} session
652652+ * @param {string} collection - Collection NSID
653653+ * @param {function} [onSessionRefresh]
654654+ * @returns {Promise<{count: number, hasMore: boolean}>}
655655+ */
656656+export async function getCollectionCount(session, collection, onSessionRefresh) {
657657+ const data = await xrpcGet(session, 'com.atproto.repo.listRecords', {
658658+ repo: session.did,
659659+ collection,
660660+ limit: 100,
661661+ }, onSessionRefresh);
662662+663663+ const records = data.records || [];
664664+ return {
665665+ count: records.length,
666666+ hasMore: !!data.cursor,
667667+ };
668668+}
669669+670670+/**
671671+ * Get all collection counts in parallel.
672672+ * @param {OAuthSession} session
673673+ * @param {string[]} collections
674674+ * @param {function} [onSessionRefresh]
675675+ * @returns {Promise<Array<{nsid: string, count: number, hasMore: boolean}>>}
676676+ */
677677+export async function getAllCollectionCounts(session, collections, onSessionRefresh) {
678678+ const results = await Promise.allSettled(
679679+ collections.map(async (nsid) => {
680680+ const { count, hasMore } = await getCollectionCount(session, nsid, onSessionRefresh);
681681+ return { nsid, count, hasMore };
682682+ })
683683+ );
684684+685685+ return results
686686+ .filter(r => r.status === 'fulfilled')
687687+ .map(r => r.value)
688688+ .sort((a, b) => b.count - a.count);
689689+}
···18141814 }
18151815};
1816181618171817+// OAuth loopback server — available to all pages including extensions
18181818+api.oauth = {
18191819+ startLoopback: (options) => ipcRenderer.invoke('oauth-start-loopback', options),
18201820+ awaitCallback: (port) => ipcRenderer.invoke('oauth-await-callback', { port }),
18211821+};
18221822+18171823// Escape handler - responds to backend's escape query
18181824// The backend intercepts ESC on keyDown via before-input-event and calls e.preventDefault(),
18191825// so the DOM keydown event never reaches the page. This means peek-dialog's own keydown