Ionosphere.tv
3
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: Lens 3 — layers.pub records to ionosphere document facets (round-trip verified)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

+196 -2
+71 -1
apps/ionosphere-appview/src/__tests__/layers-pub.test.ts
··· 1 1 import { describe, it, expect } from 'vitest'; 2 - import { transcriptToLayersPub, nlpToAnnotationLayers } from '../../../../formats/tv.ionosphere/ts/layers-pub.js'; 2 + import { transcriptToLayersPub, nlpToAnnotationLayers, layersPubToDocument } from '../../../../formats/tv.ionosphere/ts/layers-pub.js'; 3 3 4 4 describe('Lens 1: transcript → expression + segmentation', () => { 5 5 const transcript = { ··· 174 174 expect(layers.entities.annotations.length).toBeGreaterThan(10); 175 175 }); 176 176 }); 177 + 178 + describe('Lens 3: round-trip correctness', () => { 179 + it('layers.pub → document matches decodeToDocumentWithStructure output', async () => { 180 + const fs = await import('node:fs'); 181 + const path = await import('node:path'); 182 + 183 + const transcriptsDir = path.resolve(import.meta.dirname, '../../../data/transcripts'); 184 + const nlpDir = path.resolve(import.meta.dirname, '../../../../pipeline/data/nlp'); 185 + 186 + const rkey = 'ats26-keynote'; 187 + const transcriptPath = path.join(transcriptsDir, `${rkey}.json`); 188 + const nlpPath = path.join(nlpDir, `${rkey}.json`); 189 + 190 + if (!fs.existsSync(transcriptPath) || !fs.existsSync(nlpPath)) { 191 + console.log('Skipping: fixture data not available'); 192 + return; 193 + } 194 + 195 + const { encode, decodeToDocumentWithStructure } = await import( 196 + '../../../../formats/tv.ionosphere/ts/transcript-encoding.js' 197 + ); 198 + 199 + const transcriptData = JSON.parse(fs.readFileSync(transcriptPath, 'utf-8')); 200 + const nlpData = JSON.parse(fs.readFileSync(nlpPath, 'utf-8')); 201 + const compact = encode(transcriptData); 202 + 203 + // Path A: existing direct path 204 + const directDoc = decodeToDocumentWithStructure(compact, nlpData); 205 + 206 + // Path B: through lenses 207 + const did = 'did:plc:test'; 208 + const transcriptRecord = { 209 + $type: 'tv.ionosphere.transcript' as const, 210 + text: compact.text, 211 + startMs: compact.startMs, 212 + timings: compact.timings, 213 + talkUri: `at://${did}/tv.ionosphere.talk/${rkey}`, 214 + }; 215 + const { expression, segmentation } = await transcriptToLayersPub(transcriptRecord, did, rkey); 216 + const expressionUri = `at://${did}/pub.layers.expression.expression/${rkey}-expression`; 217 + const layers = await nlpToAnnotationLayers(nlpData, did, rkey, expressionUri); 218 + const lensDoc = await layersPubToDocument(expression, segmentation, layers); 219 + 220 + // Compare text 221 + expect(lensDoc.text).toBe(directDoc.text); 222 + 223 + // Compare facet counts 224 + expect(lensDoc.facets.length).toBe(directDoc.facets.length); 225 + 226 + // Sort both facet arrays by byteStart then by $type for deterministic comparison 227 + const sortFacets = (facets: typeof directDoc.facets) => 228 + [...facets].sort((a, b) => { 229 + const posA = a.index.byteStart; 230 + const posB = b.index.byteStart; 231 + if (posA !== posB) return posA - posB; 232 + const typeA = a.features[0]?.$type ?? ''; 233 + const typeB = b.features[0]?.$type ?? ''; 234 + return typeA.localeCompare(typeB); 235 + }); 236 + 237 + const sortedDirect = sortFacets(directDoc.facets); 238 + const sortedLens = sortFacets(lensDoc.facets); 239 + 240 + // Check each facet matches 241 + for (let i = 0; i < sortedDirect.length; i++) { 242 + expect(sortedLens[i].index).toEqual(sortedDirect[i].index); 243 + expect(sortedLens[i].features[0].$type).toBe(sortedDirect[i].features[0].$type); 244 + } 245 + }); 246 + });
+6
formats/tv.ionosphere/lenses/layers-to-document.lens.json
··· 1 + { 2 + "source": ["pub.layers.expression.expression", "pub.layers.segmentation.segmentation", "pub.layers.annotation.annotationLayer"], 3 + "target": "tv.ionosphere.document", 4 + "version": 1, 5 + "description": "Layers.pub records to ionosphere RelationalText document with facets (materialized view builder)" 6 + }
+119 -1
formats/tv.ionosphere/ts/layers-pub.ts
··· 1 1 /** 2 - * Lens transforms from tv.ionosphere records to pub.layers records. 2 + * Lens transforms between tv.ionosphere records and pub.layers records. 3 3 * 4 4 * Lens 1: transcriptToLayersPub 5 5 * tv.ionosphere.transcript → pub.layers.expression.expression 6 6 * + pub.layers.segmentation.segmentation 7 7 * 8 + * Lens 2: nlpToAnnotationLayers 9 + * NLP annotations → 4 pub.layers.annotation.annotationLayer records 10 + * 11 + * Lens 3: layersPubToDocument (reverse) 12 + * pub.layers records → tv.ionosphere document with facets 13 + * 8 14 * The timings replay algorithm matches decodeToDocument() in 9 15 * transcript-encoding.ts — uses TextEncoder for correct UTF-8 byte offsets, 10 16 * and indexOf with searchFrom for word position tracking. 11 17 */ 18 + 19 + import type { Document, DocumentFacet } from './transcript-encoding.js'; 12 20 13 21 export interface TranscriptRecord { 14 22 $type: string; ··· 289 297 290 298 return { sentences, paragraphs, entities, topics }; 291 299 } 300 + 301 + /** 302 + * Lens 3 (reverse): Transform pub.layers records back into an ionosphere 303 + * RelationalText document with facets. 304 + * 305 + * This is the materialized view builder — used by the appview indexer to 306 + * rebuild the talk document when layers.pub records arrive via Jetstream. 307 + * 308 + * Round-trip property: 309 + * transcriptToLayersPub + nlpToAnnotationLayers + layersPubToDocument 310 + * should produce the SAME document as decodeToDocumentWithStructure. 311 + */ 312 + export async function layersPubToDocument( 313 + expression: ExpressionRecord, 314 + segmentation: SegmentationRecord, 315 + annotationLayers: AnnotationLayersResult, 316 + ): Promise<Document> { 317 + const facets: DocumentFacet[] = []; 318 + 319 + // 1. Timestamp facets from segmentation tokens 320 + for (const token of segmentation.tokenizations[0].tokens) { 321 + facets.push({ 322 + index: { 323 + byteStart: token.textSpan.byteStart, 324 + byteEnd: token.textSpan.byteEnd, 325 + }, 326 + features: [ 327 + { 328 + $type: 'tv.ionosphere.facet#timestamp', 329 + startTime: token.temporalSpan.start * 1_000_000, // ms → ns 330 + endTime: token.temporalSpan.ending * 1_000_000, 331 + }, 332 + ], 333 + }); 334 + } 335 + 336 + // 2. Sentence facets 337 + for (const ann of annotationLayers.sentences.annotations) { 338 + facets.push({ 339 + index: { 340 + byteStart: ann.anchor.textSpan.byteStart, 341 + byteEnd: ann.anchor.textSpan.byteEnd, 342 + }, 343 + features: [{ $type: 'tv.ionosphere.facet#sentence' }], 344 + }); 345 + } 346 + 347 + // 3. Paragraph facets 348 + for (const ann of annotationLayers.paragraphs.annotations) { 349 + facets.push({ 350 + index: { 351 + byteStart: ann.anchor.textSpan.byteStart, 352 + byteEnd: ann.anchor.textSpan.byteEnd, 353 + }, 354 + features: [{ $type: 'tv.ionosphere.facet#paragraph' }], 355 + }); 356 + } 357 + 358 + // 4. Entity facets — route based on features entries 359 + // conceptUri → #concept-ref, else → #entity 360 + // (speakerDid routing not needed — zero instances in actual data) 361 + for (const ann of annotationLayers.entities.annotations) { 362 + const entries = ann.features?.entries ?? []; 363 + const conceptUriEntry = entries.find((e) => e.key === 'conceptUri'); 364 + const nerTypeEntry = entries.find((e) => e.key === 'nerType'); 365 + 366 + if (conceptUriEntry) { 367 + facets.push({ 368 + index: { 369 + byteStart: ann.anchor.textSpan.byteStart, 370 + byteEnd: ann.anchor.textSpan.byteEnd, 371 + }, 372 + features: [ 373 + { 374 + $type: 'tv.ionosphere.facet#concept-ref', 375 + conceptUri: conceptUriEntry.value, 376 + conceptName: ann.label, 377 + }, 378 + ], 379 + }); 380 + } else { 381 + facets.push({ 382 + index: { 383 + byteStart: ann.anchor.textSpan.byteStart, 384 + byteEnd: ann.anchor.textSpan.byteEnd, 385 + }, 386 + features: [ 387 + { 388 + $type: 'tv.ionosphere.facet#entity', 389 + label: ann.label, 390 + nerType: nerTypeEntry?.value, 391 + }, 392 + ], 393 + }); 394 + } 395 + } 396 + 397 + // 5. Topic break facets 398 + for (const ann of annotationLayers.topics.annotations) { 399 + facets.push({ 400 + index: { 401 + byteStart: ann.anchor.textSpan.byteStart, 402 + byteEnd: ann.anchor.textSpan.byteEnd, 403 + }, 404 + features: [{ $type: 'tv.ionosphere.facet#topic-break' }], 405 + }); 406 + } 407 + 408 + return { text: expression.text, facets }; 409 + }