···11+{
22+ "lexicon": 1,
33+ "$type": "com.atproto.lexicon.schema",
44+ "id": "pub.layers.annotation.annotationLayer",
55+ "revision": 1,
66+ "description": "An annotation layer attaching typed annotations to a pub.layers.expression. Covers token-tag, span, relation, tree, and other layer kinds. Each annotation has an anchor, a label, and optional confidence and features.",
77+ "defs": {
88+ "main": {
99+ "type": "record",
1010+ "key": "any",
1111+ "record": {
1212+ "type": "object",
1313+ "required": ["expression", "kind", "subkind", "sourceMethod", "annotations"],
1414+ "properties": {
1515+ "expression": {
1616+ "type": "string",
1717+ "format": "at-uri",
1818+ "description": "AT URI of the pub.layers.expression.expression this layer annotates."
1919+ },
2020+ "kindUri": {
2121+ "type": "string",
2222+ "format": "at-uri",
2323+ "description": "AT URI of a record formally defining the annotation layer kind."
2424+ },
2525+ "kind": {
2626+ "type": "string",
2727+ "description": "Layer kind: token-tag, span, relation, tree, etc."
2828+ },
2929+ "subkindUri": {
3030+ "type": "string",
3131+ "format": "at-uri",
3232+ "description": "AT URI of a record formally defining the annotation subkind (e.g. a specific tag set)."
3333+ },
3434+ "subkind": {
3535+ "type": "string",
3636+ "description": "More specific categorisation within the layer kind (e.g. POS, NER, dependency)."
3737+ },
3838+ "sourceMethodUri": {
3939+ "type": "string",
4040+ "format": "at-uri",
4141+ "description": "AT URI of a record formally defining the source method."
4242+ },
4343+ "sourceMethod": {
4444+ "type": "string",
4545+ "description": "How annotations were produced: manual-native, automatic, converted, etc."
4646+ },
4747+ "annotations": {
4848+ "type": "array",
4949+ "items": {
5050+ "type": "ref",
5151+ "ref": "pub.layers.annotation.annotationLayer#annotation"
5252+ },
5353+ "description": "The individual annotations that make up this layer."
5454+ },
5555+ "tokenizationId": {
5656+ "type": "ref",
5757+ "ref": "pub.layers.defs#uuid",
5858+ "description": "UUID of the tokenization this layer is aligned to (for token-tag layers)."
5959+ },
6060+ "rank": {
6161+ "type": "integer",
6262+ "description": "Ordering rank when multiple layers of the same kind exist on the same expression."
6363+ },
6464+ "metadata": {
6565+ "type": "ref",
6666+ "ref": "pub.layers.defs#annotationMetadata",
6767+ "description": "Provenance metadata for this layer."
6868+ },
6969+ "createdAt": {
7070+ "type": "string",
7171+ "format": "datetime",
7272+ "description": "ISO 8601 datetime when this record was created."
7373+ }
7474+ }
7575+ }
7676+ },
7777+ "annotation": {
7878+ "type": "object",
7979+ "description": "A single annotation within an annotation layer.",
8080+ "required": ["label"],
8181+ "properties": {
8282+ "uuid": {
8383+ "type": "ref",
8484+ "ref": "pub.layers.defs#uuid",
8585+ "description": "Stable UUID for this annotation."
8686+ },
8787+ "anchor": {
8888+ "type": "ref",
8989+ "ref": "pub.layers.defs#anchor",
9090+ "description": "Position this annotation targets (text span, token, or temporal span)."
9191+ },
9292+ "label": {
9393+ "type": "string",
9494+ "description": "The annotation label (e.g. a POS tag, NER type, sentiment class)."
9595+ },
9696+ "text": {
9797+ "type": "string",
9898+ "description": "The surface text covered by this annotation (optional, for readability)."
9999+ },
100100+ "confidence": {
101101+ "type": "integer",
102102+ "description": "Confidence score in the range 0–1000 (0 = none, 1000 = certain).",
103103+ "minimum": 0,
104104+ "maximum": 1000
105105+ },
106106+ "features": {
107107+ "type": "ref",
108108+ "ref": "pub.layers.defs#featureMap",
109109+ "description": "Additional linguistic features for this annotation."
110110+ }
111111+ }
112112+ }
113113+ }
114114+}
+151
lexicons/pub/layers/defs.json
···11+{
22+ "lexicon": 1,
33+ "$type": "com.atproto.lexicon.schema",
44+ "id": "pub.layers.defs",
55+ "revision": 1,
66+ "description": "Shared type definitions for the layers.pub v0.5.0 lexicon family. Covers spans, temporal spans, anchors, token references, annotation metadata, and feature maps.",
77+ "defs": {
88+ "span": {
99+ "type": "object",
1010+ "description": "A byte-offset span into a text string, with optional character offsets.",
1111+ "required": ["byteStart", "byteEnd"],
1212+ "properties": {
1313+ "byteStart": {
1414+ "type": "integer",
1515+ "description": "Inclusive start byte offset."
1616+ },
1717+ "byteEnd": {
1818+ "type": "integer",
1919+ "description": "Exclusive end byte offset."
2020+ },
2121+ "charStart": {
2222+ "type": "integer",
2323+ "description": "Inclusive start Unicode character offset (optional)."
2424+ },
2525+ "charEnd": {
2626+ "type": "integer",
2727+ "description": "Exclusive end Unicode character offset (optional)."
2828+ }
2929+ }
3030+ },
3131+ "temporalSpan": {
3232+ "type": "object",
3333+ "description": "A time range in milliseconds, e.g. within a media stream.",
3434+ "required": ["start", "ending"],
3535+ "properties": {
3636+ "start": {
3737+ "type": "integer",
3838+ "description": "Start time in milliseconds."
3939+ },
4040+ "ending": {
4141+ "type": "integer",
4242+ "description": "End time in milliseconds."
4343+ }
4444+ }
4545+ },
4646+ "uuid": {
4747+ "type": "object",
4848+ "description": "A UUID value wrapper.",
4949+ "required": ["value"],
5050+ "properties": {
5151+ "value": {
5252+ "type": "string",
5353+ "description": "UUID string (e.g. RFC 4122)."
5454+ }
5555+ }
5656+ },
5757+ "tokenRef": {
5858+ "type": "object",
5959+ "description": "A reference to a specific token within a named tokenization.",
6060+ "required": ["tokenizationId", "tokenIndex"],
6161+ "properties": {
6262+ "tokenizationId": {
6363+ "type": "ref",
6464+ "ref": "pub.layers.defs#uuid",
6565+ "description": "UUID identifying the tokenization this token belongs to."
6666+ },
6767+ "tokenIndex": {
6868+ "type": "integer",
6969+ "description": "Zero-based index of the token within the tokenization."
7070+ }
7171+ }
7272+ },
7373+ "anchor": {
7474+ "type": "object",
7575+ "description": "Polymorphic anchor pointing to a text span, a token, or a temporal position. At least one field should be set.",
7676+ "properties": {
7777+ "textSpan": {
7878+ "type": "ref",
7979+ "ref": "pub.layers.defs#span",
8080+ "description": "Byte-offset span into the expression text."
8181+ },
8282+ "tokenRef": {
8383+ "type": "ref",
8484+ "ref": "pub.layers.defs#tokenRef",
8585+ "description": "Reference to a specific token in a tokenization."
8686+ },
8787+ "temporalSpan": {
8888+ "type": "ref",
8989+ "ref": "pub.layers.defs#temporalSpan",
9090+ "description": "Time range within a media stream."
9191+ }
9292+ }
9393+ },
9494+ "annotationMetadata": {
9595+ "type": "object",
9696+ "description": "Provenance metadata for an annotation or layer.",
9797+ "properties": {
9898+ "agent": {
9999+ "type": "string",
100100+ "format": "at-uri",
101101+ "description": "AT URI of the agent (DID or account) that produced this annotation."
102102+ },
103103+ "tool": {
104104+ "type": "string",
105105+ "description": "Name or identifier of the tool or model used."
106106+ },
107107+ "timestamp": {
108108+ "type": "string",
109109+ "format": "datetime",
110110+ "description": "ISO 8601 datetime when the annotation was produced."
111111+ },
112112+ "confidence": {
113113+ "type": "integer",
114114+ "description": "Confidence score in the range 0–1000 (0 = none, 1000 = certain).",
115115+ "minimum": 0,
116116+ "maximum": 1000
117117+ }
118118+ }
119119+ },
120120+ "feature": {
121121+ "type": "object",
122122+ "description": "A key-value linguistic feature entry.",
123123+ "required": ["key", "value"],
124124+ "properties": {
125125+ "key": {
126126+ "type": "string",
127127+ "description": "Feature name (e.g. POS, Gender, Number)."
128128+ },
129129+ "value": {
130130+ "type": "string",
131131+ "description": "Feature value (e.g. NOUN, Masc, Sing)."
132132+ }
133133+ }
134134+ },
135135+ "featureMap": {
136136+ "type": "object",
137137+ "description": "A set of linguistic feature key-value pairs.",
138138+ "required": ["entries"],
139139+ "properties": {
140140+ "entries": {
141141+ "type": "array",
142142+ "items": {
143143+ "type": "ref",
144144+ "ref": "pub.layers.defs#feature"
145145+ },
146146+ "description": "List of feature entries."
147147+ }
148148+ }
149149+ }
150150+ }
151151+}
+75
lexicons/pub/layers/expression/expression.json
···11+{
22+ "lexicon": 1,
33+ "$type": "com.atproto.lexicon.schema",
44+ "id": "pub.layers.expression.expression",
55+ "revision": 1,
66+ "description": "A linguistic expression record — a unit of text at any granularity (document, transcript, paragraph, sentence, word, morpheme). Expressions may be nested via parentRef and anchored into media via mediaRef and anchor.",
77+ "defs": {
88+ "main": {
99+ "type": "record",
1010+ "key": "any",
1111+ "record": {
1212+ "type": "object",
1313+ "required": ["id", "kind", "text", "language", "createdAt"],
1414+ "properties": {
1515+ "id": {
1616+ "type": "string",
1717+ "description": "Stable identifier for this expression (e.g. UUID)."
1818+ },
1919+ "kindUri": {
2020+ "type": "string",
2121+ "format": "at-uri",
2222+ "description": "AT URI of a record that formally defines the expression kind."
2323+ },
2424+ "kind": {
2525+ "type": "string",
2626+ "description": "Expression granularity: document, transcript, paragraph, sentence, word, morpheme, etc."
2727+ },
2828+ "text": {
2929+ "type": "string",
3030+ "description": "The full text content of this expression."
3131+ },
3232+ "language": {
3333+ "type": "string",
3434+ "description": "BCP 47 language tag (e.g. en, fr, zh-Hant)."
3535+ },
3636+ "mediaRef": {
3737+ "type": "string",
3838+ "format": "at-uri",
3939+ "description": "AT URI of the media record this expression derives from (e.g. a video or audio record)."
4040+ },
4141+ "sourceRef": {
4242+ "type": "string",
4343+ "format": "at-uri",
4444+ "description": "AT URI of the source record from which this expression was produced (e.g. a transcript record)."
4545+ },
4646+ "parentRef": {
4747+ "type": "string",
4848+ "format": "at-uri",
4949+ "description": "AT URI of the parent expression, for nested structure (e.g. sentence within paragraph)."
5050+ },
5151+ "anchor": {
5252+ "type": "ref",
5353+ "ref": "pub.layers.defs#anchor",
5454+ "description": "Position of this expression within its parent text or media."
5555+ },
5656+ "metadata": {
5757+ "type": "ref",
5858+ "ref": "pub.layers.defs#annotationMetadata",
5959+ "description": "Provenance metadata for this expression."
6060+ },
6161+ "features": {
6262+ "type": "ref",
6363+ "ref": "pub.layers.defs#featureMap",
6464+ "description": "Linguistic features associated with this expression."
6565+ },
6666+ "createdAt": {
6767+ "type": "string",
6868+ "format": "datetime",
6969+ "description": "ISO 8601 datetime when this record was created."
7070+ }
7171+ }
7272+ }
7373+ }
7474+ }
7575+}