feat: publish layers.pub records in Stage 6 of publish pipeline

+52

apps/ionosphere-appview/src/__tests__/layers-pub.test.ts

··· 122 122 expect(topics.annotations[0].anchor.textSpan).toEqual({ byteStart: 12, byteEnd: 12 }); 123 123 }); 124 124 }); 125 + 126 + describe('Stage 6 integration: full record production', () => { 127 + it('produces 6 records for a talk with transcript + NLP data', async () => { 128 + const fs = await import('node:fs'); 129 + const path = await import('node:path'); 130 + 131 + const transcriptsDir = path.resolve(import.meta.dirname, '../../../data/transcripts'); 132 + const nlpDir = path.resolve(import.meta.dirname, '../../../../pipeline/data/nlp'); 133 + 134 + const rkey = 'ats26-keynote'; 135 + const transcriptPath = path.join(transcriptsDir, `${rkey}.json`); 136 + const nlpPath = path.join(nlpDir, `${rkey}.json`); 137 + 138 + // Skip if fixtures not available 139 + if (!fs.existsSync(transcriptPath) || !fs.existsSync(nlpPath)) { 140 + console.log('Skipping: fixture data not available'); 141 + return; 142 + } 143 + 144 + const { encode } = await import('../../../../formats/tv.ionosphere/ts/transcript-encoding.js'); 145 + 146 + const transcriptData = JSON.parse(fs.readFileSync(transcriptPath, 'utf-8')); 147 + const nlpData = JSON.parse(fs.readFileSync(nlpPath, 'utf-8')); 148 + const compact = encode(transcriptData); 149 + 150 + const did = 'did:plc:test'; 151 + const transcriptRecord = { 152 + $type: 'tv.ionosphere.transcript' as const, 153 + text: compact.text, 154 + startMs: compact.startMs, 155 + timings: compact.timings, 156 + talkUri: `at://${did}/tv.ionosphere.talk/${rkey}`, 157 + }; 158 + 159 + const { expression, segmentation } = await transcriptToLayersPub(transcriptRecord, did, rkey); 160 + const expressionUri = `at://${did}/pub.layers.expression.expression/${rkey}-expression`; 161 + const layers = await nlpToAnnotationLayers(nlpData, did, rkey, expressionUri); 162 + 163 + // Verify all 6 records have correct $type 164 + expect(expression.$type).toBe('pub.layers.expression.expression'); 165 + expect(segmentation.$type).toBe('pub.layers.segmentation.segmentation'); 166 + expect(layers.sentences.$type).toBe('pub.layers.annotation.annotationLayer'); 167 + expect(layers.paragraphs.$type).toBe('pub.layers.annotation.annotationLayer'); 168 + expect(layers.entities.$type).toBe('pub.layers.annotation.annotationLayer'); 169 + expect(layers.topics.$type).toBe('pub.layers.annotation.annotationLayer'); 170 + 171 + // Verify real data produces non-trivial results 172 + expect(segmentation.tokenizations[0].tokens.length).toBeGreaterThan(100); 173 + expect(layers.sentences.annotations.length).toBeGreaterThan(10); 174 + expect(layers.entities.annotations.length).toBeGreaterThan(10); 175 + }); 176 + });

+41 -1

apps/ionosphere-appview/src/publish.ts

··· 12 12 import { readFileSync, existsSync } from "node:fs"; 13 13 import path from "node:path"; 14 14 import { encode, decodeToDocumentWithStructure, type NlpAnnotations } from "@ionosphere/format/transcript-encoding"; 15 + import { transcriptToLayersPub, nlpToAnnotationLayers } from "@ionosphere/format/layers-pub"; 15 16 16 17 const PDS_URL = process.env.PDS_URL ?? "http://localhost:2690"; 17 18 const BOT_HANDLE = process.env.BOT_HANDLE ?? "ionosphere.test"; ··· 26 27 // 0. Publish lens records 27 28 console.log("Publishing lens records..."); 28 29 const lensDir = path.resolve(import.meta.dirname, "../../../formats/tv.ionosphere/lenses"); 29 - for (const file of ["schedule-to-talk.lens.json", "vod-to-talk.lens.json", "openai-whisper-to-transcript.lens.json", "transcript-to-document.lens.json"]) { 30 + for (const file of ["schedule-to-talk.lens.json", "vod-to-talk.lens.json", "openai-whisper-to-transcript.lens.json", "transcript-to-document.lens.json", "transcript-to-expression.lens.json", "nlp-to-annotation-layers.lens.json", "layers-to-document.lens.json"]) { 30 31 const lensPath = path.join(lensDir, file); 31 32 if (!existsSync(lensPath)) continue; 32 33 const spec = JSON.parse(readFileSync(lensPath, "utf-8")); ··· 167 168 transcriptCount++; 168 169 } 169 170 console.log(`\nPublished ${transcriptCount} transcripts.`); 171 + 172 + // 5. Publish layers.pub records 173 + console.log("\n=== Stage 6: layers.pub records ==="); 174 + let layersCount = 0; 175 + 176 + for (const talk of talks) { 177 + const transcriptPath = path.join(transcriptsDir, `${talk.rkey}.json`); 178 + const nlpPath = path.join(nlpDir, `${talk.rkey}.json`); 179 + if (!existsSync(transcriptPath) || !existsSync(nlpPath)) continue; 180 + 181 + const transcriptData = JSON.parse(readFileSync(transcriptPath, "utf-8")); 182 + const nlpData = JSON.parse(readFileSync(nlpPath, "utf-8")); 183 + const compact = encode(transcriptData); 184 + 185 + const transcriptRecord = { 186 + $type: "tv.ionosphere.transcript" as const, 187 + text: compact.text, 188 + startMs: compact.startMs, 189 + timings: compact.timings, 190 + talkUri: `at://${did}/tv.ionosphere.talk/${talk.rkey}`, 191 + }; 192 + 193 + const { expression, segmentation } = await transcriptToLayersPub(transcriptRecord, did, talk.rkey); 194 + const expressionUri = `at://${did}/pub.layers.expression.expression/${talk.rkey}-expression`; 195 + const layers = await nlpToAnnotationLayers(nlpData, did, talk.rkey, expressionUri); 196 + 197 + await Promise.all([ 198 + pds.putRecord("pub.layers.expression.expression", `${talk.rkey}-expression`, expression), 199 + pds.putRecord("pub.layers.segmentation.segmentation", `${talk.rkey}-segmentation`, segmentation), 200 + pds.putRecord("pub.layers.annotation.annotationLayer", `${talk.rkey}-sentences`, layers.sentences), 201 + pds.putRecord("pub.layers.annotation.annotationLayer", `${talk.rkey}-paragraphs`, layers.paragraphs), 202 + pds.putRecord("pub.layers.annotation.annotationLayer", `${talk.rkey}-entities`, layers.entities), 203 + pds.putRecord("pub.layers.annotation.annotationLayer", `${talk.rkey}-topics`, layers.topics), 204 + ]); 205 + 206 + console.log(` layers.pub: ${talk.rkey} (6 records)`); 207 + layersCount++; 208 + } 209 + console.log(`Published layers.pub records for ${layersCount} talks.`); 170 210 171 211 console.log(`\nAll records published to ${PDS_URL}`); 172 212 console.log(`DID: ${did}`);

+2 -1

formats/tv.ionosphere/package.json

··· 8 8 "./assemble": "./ts/assemble.ts", 9 9 "./lenses": "./ts/lenses.ts", 10 10 "./panproto": "./ts/panproto.ts", 11 - "./transcript-encoding": "./ts/transcript-encoding.ts" 11 + "./transcript-encoding": "./ts/transcript-encoding.ts", 12 + "./layers-pub": "./ts/layers-pub.ts" 12 13 }, 13 14 "dependencies": { 14 15 "@msgpack/msgpack": "^3.1.3",

Configure Feed

Configure Feed