a textual notation to locate fields within atproto records (draft spec)
microcosm.tngl.io/RecordPath/
1// RecordPath — parser, matcher, enumerator
2// Reference implementation of the RecordPath draft spec
3
4use serde_json::Value;
5use std::collections::HashSet;
6
7// -- Error --
8
9/// A parse error from a RecordPath string.
10#[derive(Debug, Clone, thiserror::Error)]
11pub enum ParseError {
12 #[error("empty path")]
13 EmptyPath,
14
15 #[error("empty segment (position {position} in '{input}')")]
16 EmptySegment { input: String, position: usize },
17
18 #[error("escape at end of input (position {position} in '{input}')")]
19 EscapeAtEnd { input: String, position: usize },
20
21 #[error("escape followed by non-escapable '{ch}' (position {position} in '{input}')")]
22 InvalidEscape {
23 input: String,
24 position: usize,
25 ch: char,
26 },
27
28 #[error("unexpected '{ch}' without opening bracket (position {position} in '{input}')")]
29 UnexpectedClose {
30 input: String,
31 position: usize,
32 ch: char,
33 },
34
35 #[error("unclosed '{open}' (position {position} in '{input}')")]
36 Unclosed {
37 input: String,
38 position: usize,
39 open: char,
40 },
41
42 #[error("trailing dot (position {position} in '{input}')")]
43 TrailingDot { input: String, position: usize },
44}
45
46impl ParseError {
47 pub fn input(&self) -> &str {
48 match self {
49 Self::EmptyPath => "",
50 Self::EmptySegment { input, .. }
51 | Self::EscapeAtEnd { input, .. }
52 | Self::InvalidEscape { input, .. }
53 | Self::UnexpectedClose { input, .. }
54 | Self::Unclosed { input, .. }
55 | Self::TrailingDot { input, .. } => input,
56 }
57 }
58
59 pub fn position(&self) -> usize {
60 match self {
61 Self::EmptyPath => 0,
62 Self::EmptySegment { position, .. }
63 | Self::EscapeAtEnd { position, .. }
64 | Self::InvalidEscape { position, .. }
65 | Self::UnexpectedClose { position, .. }
66 | Self::Unclosed { position, .. }
67 | Self::TrailingDot { position, .. } => *position,
68 }
69 }
70
71 /// A corrected version of the input, if one can be inferred.
72 pub fn suggestion(&self) -> Option<String> {
73 let input = self.input();
74 let pos = self.position();
75 match self {
76 Self::EmptyPath | Self::EmptySegment { .. } => None,
77 Self::EscapeAtEnd { .. } => Some(format!("{}!!", &input[..pos])),
78 Self::InvalidEscape { .. } => {
79 Some(format!("{}!!{}", &input[..pos], &input[pos + 1..]))
80 }
81 Self::UnexpectedClose { ch, .. } => {
82 Some(format!("{}!{ch}{}", &input[..pos], &input[pos + 1..]))
83 }
84 Self::Unclosed { open, .. } => {
85 let close = if *open == '[' { ']' } else { '}' };
86 Some(format!("{input}{close}"))
87 }
88 Self::TrailingDot { .. } => Some(input[..input.len() - 1].to_string()),
89 }
90 }
91
92 /// Explanation of what the suggestion changes.
93 pub fn suggestion_hint(&self) -> Option<String> {
94 match self {
95 Self::EmptyPath | Self::EmptySegment { .. } => None,
96 Self::EscapeAtEnd { .. } | Self::InvalidEscape { .. } => {
97 Some("escape the '!' as '!!'".into())
98 }
99 Self::UnexpectedClose { ch, .. } => Some(format!("escape as '!{ch}'")),
100 Self::Unclosed { open, .. } => {
101 let close = if *open == '[' { ']' } else { '}' };
102 Some(format!("close with '{close}'"))
103 }
104 Self::TrailingDot { .. } => Some("remove the trailing dot".into()),
105 }
106 }
107}
108
109// -- Types --
110
111#[derive(Debug, Clone, PartialEq, Eq)]
112pub enum Qualifier {
113 Array,
114 ArrayUnion(String),
115 ScalarUnion(String),
116}
117
118#[derive(Debug, Clone, PartialEq, Eq)]
119pub struct Segment {
120 pub key: String,
121 pub qualifiers: Vec<Qualifier>,
122}
123
124#[derive(Debug, Clone, Copy, PartialEq, Eq)]
125pub enum PathType {
126 Scalar,
127 Vector,
128}
129
130#[derive(Debug, Clone, PartialEq, Eq)]
131pub struct PathInfo {
132 pub path: String,
133 pub path_type: PathType,
134}
135
136// -- Escape --
137
138fn is_structural(ch: char) -> bool {
139 matches!(ch, '.' | '[' | ']' | '{' | '}' | '!')
140}
141
142pub fn escape_field_name(key: &str) -> String {
143 let mut out = String::with_capacity(key.len());
144 for ch in key.chars() {
145 if is_structural(ch) {
146 out.push('!');
147 }
148 out.push(ch);
149 }
150 out
151}
152
153// -- Parser --
154
155pub fn parse(input: &str) -> Result<Vec<Segment>, ParseError> {
156 if input.is_empty() {
157 return Err(ParseError::EmptyPath);
158 }
159
160 let bytes = input.as_bytes();
161 let len = bytes.len();
162 let mut segments = Vec::new();
163 let mut i = 0;
164
165 while i < len {
166 let mut key = String::new();
167
168 while i < len && !matches!(bytes[i], b'.' | b'[' | b'{') {
169 match bytes[i] {
170 b'!' => {
171 if i + 1 >= len {
172 return Err(ParseError::EscapeAtEnd {
173 input: input.into(),
174 position: i,
175 });
176 }
177 let next = bytes[i + 1] as char;
178 if !is_structural(next) {
179 return Err(ParseError::InvalidEscape {
180 input: input.into(),
181 position: i,
182 ch: next,
183 });
184 }
185 key.push(next);
186 i += 2;
187 }
188 b']' | b'}' => {
189 return Err(ParseError::UnexpectedClose {
190 input: input.into(),
191 position: i,
192 ch: bytes[i] as char,
193 });
194 }
195 _ => {
196 // Consume a full UTF-8 character (structural chars are all
197 // ASCII, so non-ASCII bytes are always key content).
198 let start = i;
199 i += 1;
200 while i < len && (bytes[i] & 0xC0) == 0x80 {
201 i += 1;
202 }
203 key.push_str(&input[start..i]);
204 }
205 }
206 }
207
208 if key.is_empty() {
209 return Err(ParseError::EmptySegment {
210 input: input.into(),
211 position: i,
212 });
213 }
214
215 let mut qualifiers = Vec::new();
216 while i < len && matches!(bytes[i], b'[' | b'{') {
217 let open = bytes[i] as char;
218 let close = if open == '[' { b']' } else { b'}' };
219 let open_pos = i;
220 i += 1;
221 let content_start = i;
222 while i < len && bytes[i] != close {
223 i += 1;
224 }
225 if i >= len {
226 return Err(ParseError::Unclosed {
227 input: input.into(),
228 position: open_pos,
229 open,
230 });
231 }
232 let content = &input[content_start..i];
233 i += 1;
234
235 qualifiers.push(match (open, content.is_empty()) {
236 ('[', true) => Qualifier::Array,
237 ('[', false) => Qualifier::ArrayUnion(content.into()),
238 (_, _) => Qualifier::ScalarUnion(content.into()),
239 });
240 }
241
242 segments.push(Segment { key, qualifiers });
243
244 if i < len && bytes[i] == b'.' {
245 i += 1;
246 if i >= len {
247 return Err(ParseError::TrailingDot {
248 input: input.into(),
249 position: i - 1,
250 });
251 }
252 }
253 }
254
255 Ok(segments)
256}
257
258// -- Matcher --
259
260pub fn match_path(record: &Value, path: &str) -> Vec<Value> {
261 let Ok(segments) = parse(path) else {
262 return vec![];
263 };
264 match_segments(record, &segments, 0)
265}
266
267fn match_segments(data: &Value, segments: &[Segment], seg_idx: usize) -> Vec<Value> {
268 if seg_idx >= segments.len() {
269 return vec![data.clone()];
270 }
271 let seg = &segments[seg_idx];
272 let Some(obj) = data.as_object() else {
273 return vec![];
274 };
275 let Some(value) = obj.get(&seg.key) else {
276 return vec![];
277 };
278 apply_qualifiers(value, &seg.qualifiers, 0, segments, seg_idx)
279}
280
281fn apply_qualifiers(
282 value: &Value,
283 qualifiers: &[Qualifier],
284 qual_idx: usize,
285 segments: &[Segment],
286 seg_idx: usize,
287) -> Vec<Value> {
288 let Some(qual) = qualifiers.get(qual_idx) else {
289 // No more qualifiers — advance to next segment or collect
290 return if seg_idx + 1 >= segments.len() {
291 vec![value.clone()]
292 } else {
293 match_segments(value, segments, seg_idx + 1)
294 };
295 };
296
297 match qual {
298 Qualifier::ScalarUnion(nsid) => {
299 let type_matches = value
300 .as_object()
301 .and_then(|o| o.get("$type"))
302 .and_then(|t| t.as_str())
303 .is_some_and(|t| t == nsid);
304 if type_matches {
305 apply_qualifiers(value, qualifiers, qual_idx + 1, segments, seg_idx)
306 } else {
307 vec![]
308 }
309 }
310 Qualifier::Array | Qualifier::ArrayUnion(_) => {
311 let Some(arr) = value.as_array() else {
312 return vec![];
313 };
314 arr.iter()
315 .filter(|elem| match qual {
316 Qualifier::ArrayUnion(nsid) => elem
317 .as_object()
318 .and_then(|o| o.get("$type"))
319 .and_then(|t| t.as_str())
320 .is_some_and(|t| t == nsid),
321 _ => true,
322 })
323 .flat_map(|elem| {
324 apply_qualifiers(elem, qualifiers, qual_idx + 1, segments, seg_idx)
325 })
326 .collect()
327 }
328 }
329}
330
331// -- Enumerator --
332
333const DEFAULT_MAX_DEPTH: usize = 64;
334
335/// Returns a lazy iterator over all `(PathInfo, &Value)` pairs reachable from
336/// a record. Paths are deduplicated; each unique path is yielded once.
337pub fn enumerate(record: &Value) -> Paths<'_> {
338 Paths::new(record, DEFAULT_MAX_DEPTH)
339}
340
341/// Work items for the stack-based tree walk.
342enum Work<'a> {
343 /// Yield this path+value if not yet seen.
344 Emit {
345 path: String,
346 path_type: PathType,
347 value: &'a Value,
348 },
349 /// Expand an object's entries onto the stack.
350 Object {
351 obj: &'a serde_json::Map<String, Value>,
352 prefix: String,
353 is_vector: bool,
354 depth: usize,
355 },
356 /// Expand an array's elements onto the stack.
357 Array {
358 arr: &'a [Value],
359 arr_value: &'a Value,
360 prefix: String,
361 depth: usize,
362 },
363}
364
365pub struct Paths<'a> {
366 stack: Vec<Work<'a>>,
367 seen: HashSet<String>,
368 max_depth: usize,
369}
370
371impl<'a> Paths<'a> {
372 fn new(record: &'a Value, max_depth: usize) -> Self {
373 let mut paths = Self {
374 stack: Vec::new(),
375 seen: HashSet::new(),
376 max_depth,
377 };
378 if let Some(obj) = record.as_object() {
379 paths.stack.push(Work::Object {
380 obj,
381 prefix: String::new(),
382 is_vector: false,
383 depth: 0,
384 });
385 }
386 paths
387 }
388
389 pub fn with_max_depth(mut self, max_depth: usize) -> Self {
390 self.max_depth = max_depth;
391 self
392 }
393
394 fn expand_object(
395 &mut self,
396 obj: &'a serde_json::Map<String, Value>,
397 prefix: &str,
398 is_vector: bool,
399 depth: usize,
400 ) {
401 let vtype = if is_vector {
402 PathType::Vector
403 } else {
404 PathType::Scalar
405 };
406
407 // Push in reverse so the first key is at the top of the stack.
408 let entries: Vec<_> = obj.iter().collect();
409 for (key, child) in entries.into_iter().rev() {
410 let escaped = escape_field_name(key);
411 let key_path = if prefix.is_empty() {
412 escaped
413 } else {
414 format!("{prefix}.{escaped}")
415 };
416
417 // Push children first (deeper in stack), then the emit (top).
418 match child {
419 Value::Array(arr) => {
420 self.stack.push(Work::Array {
421 arr,
422 arr_value: child,
423 prefix: key_path.clone(),
424 depth: depth + 1,
425 });
426 self.stack.push(Work::Emit {
427 path: key_path,
428 path_type: vtype,
429 value: child,
430 });
431 }
432 Value::Object(child_obj) => {
433 match child_obj.get("$type").and_then(|t| t.as_str()) {
434 Some(nsid) => {
435 let qualified = format!("{key_path}{{{nsid}}}");
436 self.stack.push(Work::Object {
437 obj: child_obj,
438 prefix: qualified.clone(),
439 is_vector,
440 depth: depth + 1,
441 });
442 self.stack.push(Work::Emit {
443 path: qualified,
444 path_type: vtype,
445 value: child,
446 });
447 self.stack.push(Work::Emit {
448 path: key_path,
449 path_type: vtype,
450 value: child,
451 });
452 }
453 None => {
454 self.stack.push(Work::Object {
455 obj: child_obj,
456 prefix: key_path.clone(),
457 is_vector,
458 depth: depth + 1,
459 });
460 self.stack.push(Work::Emit {
461 path: key_path,
462 path_type: vtype,
463 value: child,
464 });
465 }
466 }
467 }
468 _ => {
469 self.stack.push(Work::Emit {
470 path: key_path,
471 path_type: vtype,
472 value: child,
473 });
474 }
475 }
476 }
477 }
478
479 fn expand_array(
480 &mut self,
481 arr: &'a [Value],
482 arr_value: &'a Value,
483 prefix: &str,
484 depth: usize,
485 ) {
486 let has_union = arr
487 .iter()
488 .any(|el| el.as_object().is_some_and(|o| o.contains_key("$type")));
489
490 if has_union {
491 let mut has_plain = false;
492
493 for el in arr.iter().rev() {
494 match el
495 .as_object()
496 .and_then(|o| o.get("$type"))
497 .and_then(|t| t.as_str())
498 {
499 Some(nsid) => {
500 let qp = format!("{prefix}[{nsid}]");
501 if let Some(obj) = el.as_object() {
502 self.stack.push(Work::Object {
503 obj,
504 prefix: qp.clone(),
505 is_vector: true,
506 depth: depth + 1,
507 });
508 }
509 self.stack.push(Work::Emit {
510 path: qp,
511 path_type: PathType::Vector,
512 value: el,
513 });
514 }
515 None => {
516 has_plain = true;
517 self.expand_child_value(el, &format!("{prefix}[]"), depth + 1);
518 }
519 }
520 }
521
522 if has_plain {
523 self.stack.push(Work::Emit {
524 path: format!("{prefix}[]"),
525 path_type: PathType::Vector,
526 value: arr_value,
527 });
528 }
529 } else {
530 let bare = format!("{prefix}[]");
531
532 for el in arr.iter().rev() {
533 self.expand_child_value(el, &bare, depth + 1);
534 }
535
536 self.stack.push(Work::Emit {
537 path: bare,
538 path_type: PathType::Vector,
539 value: arr_value,
540 });
541 }
542 }
543
544 fn expand_child_value(&mut self, value: &'a Value, prefix: &str, depth: usize) {
545 match value {
546 Value::Object(obj) => {
547 self.stack.push(Work::Object {
548 obj,
549 prefix: prefix.to_string(),
550 is_vector: true,
551 depth,
552 });
553 }
554 Value::Array(arr) => {
555 self.stack.push(Work::Array {
556 arr,
557 arr_value: value,
558 prefix: prefix.to_string(),
559 depth,
560 });
561 }
562 _ => {}
563 }
564 }
565}
566
567impl<'a> Iterator for Paths<'a> {
568 type Item = (PathInfo, &'a Value);
569
570 fn next(&mut self) -> Option<Self::Item> {
571 loop {
572 match self.stack.pop()? {
573 Work::Emit {
574 path,
575 path_type,
576 value,
577 } => {
578 if self.seen.insert(path.clone()) {
579 return Some((PathInfo { path, path_type }, value));
580 }
581 }
582 Work::Object {
583 obj,
584 prefix,
585 is_vector,
586 depth,
587 } => {
588 if depth <= self.max_depth {
589 self.expand_object(obj, &prefix, is_vector, depth);
590 }
591 }
592 Work::Array {
593 arr,
594 arr_value,
595 prefix,
596 depth,
597 } => {
598 if depth <= self.max_depth {
599 self.expand_array(arr, arr_value, &prefix, depth);
600 }
601 }
602 }
603 }
604 }
605}
606
607// -- is_vector --
608
609pub fn is_vector(path: &str) -> bool {
610 let bytes = path.as_bytes();
611 let mut i = 0;
612 while i < bytes.len() {
613 match bytes[i] {
614 b'!' if i + 1 < bytes.len() => i += 2,
615 b'[' => return true,
616 _ => i += 1,
617 }
618 }
619 false
620}