feat: Lens 2 — NLP annotations to 4 annotation layer records

+68 -1

apps/ionosphere-appview/src/__tests__/layers-pub.test.ts

··· 1 1 import { describe, it, expect } from 'vitest'; 2 - import { transcriptToLayersPub } from '../../../../formats/tv.ionosphere/ts/layers-pub.js'; 2 + import { transcriptToLayersPub, nlpToAnnotationLayers } from '../../../../formats/tv.ionosphere/ts/layers-pub.js'; 3 3 4 4 describe('Lens 1: transcript → expression + segmentation', () => { 5 5 const transcript = { ··· 55 55 expect(tok.tokens[2].temporalSpan.ending).toBe(1750); 56 56 }); 57 57 }); 58 + 59 + describe('Lens 2: NLP annotations → annotation layers', () => { 60 + const nlpAnnotations = { 61 + talkRkey: 'test-talk', 62 + sentences: [ 63 + { byteStart: 0, byteEnd: 11 }, 64 + { byteStart: 12, byteEnd: 19 }, 65 + ], 66 + paragraphs: [ 67 + { byteStart: 0, byteEnd: 19 }, 68 + ], 69 + entities: [ 70 + { byteStart: 0, byteEnd: 5, label: 'Hello', nerType: 'MISC' }, 71 + { byteStart: 12, byteEnd: 15, label: 'foo', nerType: 'ORG', conceptUri: 'at://did:plc:test/tv.ionosphere.concept/foo' }, 72 + ], 73 + topicBreaks: [ 74 + { byteStart: 12 }, 75 + ], 76 + metadata: { tool: 'spacy/en_core_web_sm' }, 77 + }; 78 + 79 + const did = 'did:plc:test'; 80 + const talkRkey = 'test-talk'; 81 + const expressionUri = 'at://did:plc:test/pub.layers.expression.expression/test-talk-expression'; 82 + 83 + it('produces 4 annotation layer records', async () => { 84 + const layers = await nlpToAnnotationLayers(nlpAnnotations, did, talkRkey, expressionUri); 85 + expect(Object.keys(layers)).toEqual(['sentences', 'paragraphs', 'entities', 'topics']); 86 + }); 87 + 88 + it('sentences layer has correct structure', async () => { 89 + const { sentences } = await nlpToAnnotationLayers(nlpAnnotations, did, talkRkey, expressionUri); 90 + expect(sentences.$type).toBe('pub.layers.annotation.annotationLayer'); 91 + expect(sentences.expression).toBe(expressionUri); 92 + expect(sentences.kind).toBe('span'); 93 + expect(sentences.subkind).toBe('sentence-boundary'); 94 + expect(sentences.sourceMethod).toBe('automatic'); 95 + expect(sentences.metadata.tool).toBe('ionosphere-nlp-pipeline'); 96 + expect(sentences.annotations).toHaveLength(2); 97 + expect(sentences.annotations[0].anchor.textSpan).toEqual({ byteStart: 0, byteEnd: 11 }); 98 + }); 99 + 100 + it('entities layer wraps features in featureMap', async () => { 101 + const { entities } = await nlpToAnnotationLayers(nlpAnnotations, did, talkRkey, expressionUri); 102 + expect(entities.annotations).toHaveLength(2); 103 + 104 + // Plain entity — nerType is always present 105 + const plain = entities.annotations[0]; 106 + expect(plain.label).toBe('Hello'); 107 + expect(plain.features.entries).toContainEqual({ key: 'nerType', value: 'MISC' }); 108 + 109 + // Entity with conceptUri — all known keys forwarded to features 110 + const withConcept = entities.annotations[1]; 111 + expect(withConcept.features.entries).toContainEqual({ 112 + key: 'conceptUri', 113 + value: 'at://did:plc:test/tv.ionosphere.concept/foo', 114 + }); 115 + expect(withConcept.features.entries).toContainEqual({ key: 'nerType', value: 'ORG' }); 116 + }); 117 + 118 + it('topics layer has correct subkind and uses zero-width spans', async () => { 119 + const { topics } = await nlpToAnnotationLayers(nlpAnnotations, did, talkRkey, expressionUri); 120 + expect(topics.subkind).toBe('topic-segment'); 121 + expect(topics.annotations).toHaveLength(1); 122 + expect(topics.annotations[0].anchor.textSpan).toEqual({ byteStart: 12, byteEnd: 12 }); 123 + }); 124 + });

+12

formats/tv.ionosphere/lenses/nlp-to-annotation-layers.lens.json

··· 1 + { 2 + "source": "tv.ionosphere.nlpAnnotations", 3 + "target": ["pub.layers.annotation.annotationLayer"], 4 + "version": 1, 5 + "description": "NLP annotations to 4 annotation layer records (sentences, paragraphs, entities, topics)", 6 + "fanOut": { 7 + "sentences": { "subkind": "sentence-boundary", "from": "sentences" }, 8 + "paragraphs": { "subkind": "paragraph-boundary", "from": "paragraphs" }, 9 + "entities": { "subkind": "ner", "from": "entities" }, 10 + "topics": { "subkind": "topic-segment", "from": "topicBreaks" } 11 + } 12 + }

+136

formats/tv.ionosphere/ts/layers-pub.ts

··· 153 153 154 154 return { expression, segmentation }; 155 155 } 156 + 157 + /** 158 + * Lens 2: Transform NLP annotations into 4 pub.layers annotation layer records. 159 + * 160 + * Produces: 161 + * - sentences layer (sentence-boundary spans) 162 + * - paragraphs layer (paragraph-boundary spans) 163 + * - entities layer (NER spans with featureMap) 164 + * - topics layer (topic-segment zero-width spans) 165 + */ 166 + 167 + export interface NlpAnnotations { 168 + talkRkey: string; 169 + sentences: Array<{ byteStart: number; byteEnd: number }>; 170 + paragraphs: Array<{ byteStart: number; byteEnd: number }>; 171 + entities: Array<{ 172 + byteStart: number; 173 + byteEnd: number; 174 + label: string; 175 + [key: string]: unknown; 176 + }>; 177 + topicBreaks: Array<{ byteStart: number }>; 178 + metadata: { tool: string; [key: string]: unknown }; 179 + } 180 + 181 + export interface Annotation { 182 + anchor: { textSpan: { byteStart: number; byteEnd: number } }; 183 + label: string; 184 + features?: { entries: Array<{ key: string; value: unknown }> }; 185 + } 186 + 187 + export interface AnnotationLayerRecord { 188 + $type: 'pub.layers.annotation.annotationLayer'; 189 + expression: string; 190 + kind: string; 191 + subkind: string; 192 + sourceMethod: string; 193 + metadata: { tool: string; timestamp: string }; 194 + annotations: Annotation[]; 195 + createdAt: string; 196 + } 197 + 198 + export interface AnnotationLayersResult { 199 + sentences: AnnotationLayerRecord; 200 + paragraphs: AnnotationLayerRecord; 201 + entities: AnnotationLayerRecord; 202 + topics: AnnotationLayerRecord; 203 + } 204 + 205 + export async function nlpToAnnotationLayers( 206 + nlpAnnotations: NlpAnnotations, 207 + did: string, 208 + talkRkey: string, 209 + expressionUri: string, 210 + ): Promise<AnnotationLayersResult> { 211 + const now = new Date().toISOString(); 212 + 213 + const baseMeta = { 214 + tool: 'ionosphere-nlp-pipeline', 215 + timestamp: now, 216 + }; 217 + 218 + // Keys to exclude when forwarding entity fields to featureMap entries 219 + const entityExcludeKeys = new Set(['byteStart', 'byteEnd', 'label']); 220 + 221 + // Sentences layer 222 + const sentences: AnnotationLayerRecord = { 223 + $type: 'pub.layers.annotation.annotationLayer', 224 + expression: expressionUri, 225 + kind: 'span', 226 + subkind: 'sentence-boundary', 227 + sourceMethod: 'automatic', 228 + metadata: { ...baseMeta }, 229 + annotations: nlpAnnotations.sentences.map((s) => ({ 230 + anchor: { textSpan: { byteStart: s.byteStart, byteEnd: s.byteEnd } }, 231 + label: 'sentence', 232 + })), 233 + createdAt: now, 234 + }; 235 + 236 + // Paragraphs layer 237 + const paragraphs: AnnotationLayerRecord = { 238 + $type: 'pub.layers.annotation.annotationLayer', 239 + expression: expressionUri, 240 + kind: 'span', 241 + subkind: 'paragraph-boundary', 242 + sourceMethod: 'automatic', 243 + metadata: { ...baseMeta }, 244 + annotations: nlpAnnotations.paragraphs.map((p) => ({ 245 + anchor: { textSpan: { byteStart: p.byteStart, byteEnd: p.byteEnd } }, 246 + label: 'paragraph', 247 + })), 248 + createdAt: now, 249 + }; 250 + 251 + // Entities layer — forward all extra keys into featureMap entries 252 + const entities: AnnotationLayerRecord = { 253 + $type: 'pub.layers.annotation.annotationLayer', 254 + expression: expressionUri, 255 + kind: 'span', 256 + subkind: 'ner', 257 + sourceMethod: 'automatic', 258 + metadata: { ...baseMeta }, 259 + annotations: nlpAnnotations.entities.map((e) => { 260 + const entries: Array<{ key: string; value: unknown }> = []; 261 + for (const [key, value] of Object.entries(e)) { 262 + if (!entityExcludeKeys.has(key)) { 263 + entries.push({ key, value }); 264 + } 265 + } 266 + return { 267 + anchor: { textSpan: { byteStart: e.byteStart, byteEnd: e.byteEnd } }, 268 + label: e.label, 269 + features: { entries }, 270 + }; 271 + }), 272 + createdAt: now, 273 + }; 274 + 275 + // Topics layer — zero-width spans (byteEnd === byteStart) 276 + const topics: AnnotationLayerRecord = { 277 + $type: 'pub.layers.annotation.annotationLayer', 278 + expression: expressionUri, 279 + kind: 'span', 280 + subkind: 'topic-segment', 281 + sourceMethod: 'automatic', 282 + metadata: { ...baseMeta }, 283 + annotations: nlpAnnotations.topicBreaks.map((t) => ({ 284 + anchor: { textSpan: { byteStart: t.byteStart, byteEnd: t.byteStart } }, 285 + label: 'topic-break', 286 + })), 287 + createdAt: now, 288 + }; 289 + 290 + return { sentences, paragraphs, entities, topics }; 291 + }

Configure Feed

Configure Feed