a textual notation to locate fields within atproto records (draft spec)
microcosm.tngl.io/RecordPath/
1// RecordPath — parser, matcher, enumerator
2// Reference implementation of the RecordPath draft spec
3
4const STRUCTURAL = new Set(['.', '[', ']', '{', '}', '!']);
5
6/** Represents a parse error in a RecordPath string */
7export class RecordPathParseError extends Error {
8 /** the input string that failed to parse */
9 readonly input: string;
10 /** character position where the error was detected */
11 readonly position: number;
12 /** a corrected version of the input, if one can be inferred */
13 readonly suggestion: string | null;
14 /** explanation of what the suggestion changes */
15 readonly suggestionHint: string | null;
16
17 constructor(
18 message: string,
19 input: string,
20 position: number,
21 suggestion?: string,
22 suggestionHint?: string
23 ) {
24 super(`${message} (position ${position} in '${input}')`);
25 this.name = 'RecordPathParseError';
26 this.input = input;
27 this.position = position;
28 this.suggestion = suggestion ?? null;
29 this.suggestionHint = suggestionHint ?? null;
30 }
31}
32
33export interface Qualifier {
34 type: 'array' | 'arrayUnion' | 'scalarUnion';
35 nsid?: string;
36}
37
38export interface Segment {
39 key: string;
40 qualifiers: Qualifier[];
41}
42
43export interface PathInfo {
44 path: string;
45 type: 'scalar' | 'vector';
46}
47
48export function escapeFieldName(key: string): string {
49 let out = '';
50 for (const ch of key) {
51 if (STRUCTURAL.has(ch)) out += '!' + ch;
52 else out += ch;
53 }
54 return out;
55}
56
57export function parse(str: string): Segment[] {
58 if (str === '') throw new RecordPathParseError('empty path', str, 0);
59
60 const segments: Segment[] = [];
61 let i = 0;
62
63 while (i < str.length) {
64 let key = '';
65
66 while (i < str.length && str[i] !== '.' && str[i] !== '[' && str[i] !== '{') {
67 if (str[i] === '!') {
68 if (i + 1 >= str.length) {
69 throw new RecordPathParseError(
70 'escape at end of input',
71 str,
72 i,
73 str.slice(0, i) + '!!',
74 "escape the '!' as '!!'"
75 );
76 }
77 const next = str[i + 1];
78 if (!STRUCTURAL.has(next)) {
79 throw new RecordPathParseError(
80 `escape followed by non-escapable '${next}'`,
81 str,
82 i,
83 str.slice(0, i) + '!!' + str.slice(i + 1),
84 "escape the '!' as '!!'"
85 );
86 }
87 key += next;
88 i += 2;
89 } else if (str[i] === ']') {
90 throw new RecordPathParseError(
91 "unexpected ']' without opening '['",
92 str,
93 i,
94 str.slice(0, i) + '!]' + str.slice(i + 1),
95 "escape as '!]'"
96 );
97 } else if (str[i] === '}') {
98 throw new RecordPathParseError(
99 "unexpected '}' without opening '{'",
100 str,
101 i,
102 str.slice(0, i) + '!}' + str.slice(i + 1),
103 "escape as '!}'"
104 );
105 } else {
106 key += str[i];
107 i++;
108 }
109 }
110
111 if (key === '') {
112 throw new RecordPathParseError('empty segment', str, i);
113 }
114
115 const qualifiers: Qualifier[] = [];
116 while (i < str.length && (str[i] === '[' || str[i] === '{')) {
117 const open = str[i];
118 const close = open === '[' ? ']' : '}';
119 const openPos = i;
120 i++;
121 let content = '';
122 while (i < str.length && str[i] !== close) {
123 content += str[i];
124 i++;
125 }
126 if (i >= str.length) {
127 throw new RecordPathParseError(
128 `unclosed '${open}'`,
129 str,
130 openPos,
131 str + close,
132 `close with '${close}'`
133 );
134 }
135 i++; // skip close
136
137 if (open === '[') {
138 qualifiers.push(
139 content === '' ? { type: 'array' } : { type: 'arrayUnion', nsid: content }
140 );
141 } else {
142 qualifiers.push({ type: 'scalarUnion', nsid: content });
143 }
144 }
145
146 segments.push({ key, qualifiers });
147
148 if (i < str.length && str[i] === '.') {
149 i++;
150 if (i >= str.length) {
151 throw new RecordPathParseError(
152 'trailing dot',
153 str,
154 i - 1,
155 str.slice(0, -1),
156 'remove the trailing dot'
157 );
158 }
159 }
160 }
161
162 return segments;
163}
164
165// Match a RecordPath against a record, returning all matched values.
166export function match(record: Record<string, unknown>, pathStr: string): unknown[] {
167 if (!pathStr || pathStr.trim() === '') return [];
168 try {
169 const segments = parse(pathStr);
170 return matchSegments(record, segments, 0);
171 } catch {
172 return [];
173 }
174}
175
176function matchSegments(data: unknown, segments: Segment[], segIdx: number): unknown[] {
177 if (segIdx >= segments.length) return [data];
178 const seg = segments[segIdx];
179 if (typeof data !== 'object' || data === null || Array.isArray(data)) return [];
180 const obj = data as Record<string, unknown>;
181 const value = obj[seg.key];
182 if (value === undefined) return [];
183 return applyQualifiers(value, seg.qualifiers, 0, segments, segIdx);
184}
185
186function applyQualifiers(
187 value: unknown,
188 qualifiers: Qualifier[],
189 qualIdx: number,
190 segments: Segment[],
191 segIdx: number
192): unknown[] {
193 if (qualIdx >= qualifiers.length) {
194 if (segIdx + 1 >= segments.length) return [value];
195 return matchSegments(value, segments, segIdx + 1);
196 }
197
198 const qual = qualifiers[qualIdx];
199
200 if (qual.type === 'scalarUnion') {
201 if (typeof value !== 'object' || value === null || Array.isArray(value)) return [];
202 const obj = value as Record<string, unknown>;
203 if (obj.$type !== qual.nsid) return [];
204 return applyQualifiers(value, qualifiers, qualIdx + 1, segments, segIdx);
205 }
206
207 if (qual.type === 'array' || qual.type === 'arrayUnion') {
208 if (!Array.isArray(value)) return [];
209 const results: unknown[] = [];
210 for (const elem of value) {
211 if (qual.type === 'arrayUnion') {
212 if (typeof elem !== 'object' || elem === null) continue;
213 if ((elem as Record<string, unknown>).$type !== qual.nsid) continue;
214 }
215 results.push(...applyQualifiers(elem, qualifiers, qualIdx + 1, segments, segIdx));
216 }
217 return results;
218 }
219
220 return [];
221}
222
223// Enumerate all RecordPaths reachable from a record.
224// Returns a generator yielding [PathInfo, value] pairs, deduplicated by path.
225export function* enumerate(
226 record: Record<string, unknown>
227): Generator<[PathInfo, unknown]> {
228 const seen = new Set<string>();
229 yield* enumObject(seen, record, '', false);
230}
231
232function* enumObject(
233 seen: Set<string>,
234 obj: Record<string, unknown>,
235 prefix: string,
236 isVector: boolean
237): Generator<[PathInfo, unknown]> {
238 const vtype = isVector ? 'vector' : 'scalar';
239
240 for (const key of Object.keys(obj)) {
241 const child = obj[key];
242 const escaped = escapeFieldName(key);
243 const keyPath = prefix ? prefix + '.' + escaped : escaped;
244
245 if (child === null || child === undefined || typeof child !== 'object') {
246 if (!seen.has(keyPath)) {
247 seen.add(keyPath);
248 yield [{ path: keyPath, type: vtype }, child];
249 }
250 } else if (Array.isArray(child)) {
251 if (!seen.has(keyPath)) {
252 seen.add(keyPath);
253 yield [{ path: keyPath, type: vtype }, child];
254 }
255 yield* enumArray(seen, child, keyPath);
256 } else if ((child as Record<string, unknown>).$type) {
257 if (!seen.has(keyPath)) {
258 seen.add(keyPath);
259 yield [{ path: keyPath, type: vtype }, child];
260 }
261 const nsid = (child as Record<string, unknown>).$type as string;
262 const qualified = keyPath + '{' + nsid + '}';
263 if (!seen.has(qualified)) {
264 seen.add(qualified);
265 yield [{ path: qualified, type: vtype }, child];
266 }
267 yield* enumObject(seen, child as Record<string, unknown>, qualified, isVector);
268 } else {
269 if (!seen.has(keyPath)) {
270 seen.add(keyPath);
271 yield [{ path: keyPath, type: vtype }, child];
272 }
273 yield* enumObject(seen, child as Record<string, unknown>, keyPath, isVector);
274 }
275 }
276}
277
278function* enumArray(
279 seen: Set<string>,
280 arr: unknown[],
281 prefix: string
282): Generator<[PathInfo, unknown]> {
283 const hasUnion = arr.some(
284 (el) =>
285 typeof el === 'object' &&
286 el !== null &&
287 !Array.isArray(el) &&
288 (el as Record<string, unknown>).$type
289 );
290
291 if (hasUnion) {
292 let hasPlain = false;
293 for (const el of arr) {
294 const nsid =
295 typeof el === 'object' && el !== null && !Array.isArray(el)
296 ? ((el as Record<string, unknown>).$type as string | undefined)
297 : undefined;
298 if (nsid) {
299 const qp = prefix + '[' + nsid + ']';
300 if (!seen.has(qp)) {
301 seen.add(qp);
302 yield [{ path: qp, type: 'vector' }, el];
303 }
304 if (typeof el === 'object' && el !== null && !Array.isArray(el)) {
305 yield* enumObject(seen, el as Record<string, unknown>, qp, true);
306 }
307 } else {
308 hasPlain = true;
309 yield* enumValue(seen, el, prefix + '[]');
310 }
311 }
312 if (hasPlain) {
313 const bare = prefix + '[]';
314 if (!seen.has(bare)) {
315 seen.add(bare);
316 yield [{ path: bare, type: 'vector' }, arr];
317 }
318 }
319 } else {
320 const bare = prefix + '[]';
321 if (!seen.has(bare)) {
322 seen.add(bare);
323 yield [{ path: bare, type: 'vector' }, arr];
324 }
325 for (const el of arr) {
326 yield* enumValue(seen, el, bare);
327 }
328 }
329}
330
331function* enumValue(
332 seen: Set<string>,
333 value: unknown,
334 prefix: string
335): Generator<[PathInfo, unknown]> {
336 if (value === null || value === undefined || typeof value !== 'object') {
337 return;
338 }
339 if (Array.isArray(value)) {
340 yield* enumArray(seen, value, prefix);
341 } else {
342 yield* enumObject(seen, value as Record<string, unknown>, prefix, true);
343 }
344}
345
346export function isVector(pathStr: string): boolean {
347 for (let i = 0; i < pathStr.length; i++) {
348 if (pathStr[i] === '!' && i + 1 < pathStr.length) {
349 i++; // skip escaped character
350 } else if (pathStr[i] === '[') {
351 return true;
352 }
353 }
354 return false;
355}