// RecordPath — parser, matcher, enumerator // Reference implementation of the RecordPath draft spec use serde_json::Value; use std::collections::HashSet; // -- Error -- /// A parse error from a RecordPath string. #[derive(Debug, Clone, thiserror::Error)] pub enum ParseError { #[error("empty path")] EmptyPath, #[error("empty segment (position {position} in '{input}')")] EmptySegment { input: String, position: usize }, #[error("escape at end of input (position {position} in '{input}')")] EscapeAtEnd { input: String, position: usize }, #[error("escape followed by non-escapable '{ch}' (position {position} in '{input}')")] InvalidEscape { input: String, position: usize, ch: char, }, #[error("unexpected '{ch}' without opening bracket (position {position} in '{input}')")] UnexpectedClose { input: String, position: usize, ch: char, }, #[error("unclosed '{open}' (position {position} in '{input}')")] Unclosed { input: String, position: usize, open: char, }, #[error("trailing dot (position {position} in '{input}')")] TrailingDot { input: String, position: usize }, } impl ParseError { pub fn input(&self) -> &str { match self { Self::EmptyPath => "", Self::EmptySegment { input, .. } | Self::EscapeAtEnd { input, .. } | Self::InvalidEscape { input, .. } | Self::UnexpectedClose { input, .. } | Self::Unclosed { input, .. } | Self::TrailingDot { input, .. } => input, } } pub fn position(&self) -> usize { match self { Self::EmptyPath => 0, Self::EmptySegment { position, .. } | Self::EscapeAtEnd { position, .. } | Self::InvalidEscape { position, .. } | Self::UnexpectedClose { position, .. } | Self::Unclosed { position, .. } | Self::TrailingDot { position, .. } => *position, } } /// A corrected version of the input, if one can be inferred. pub fn suggestion(&self) -> Option { let input = self.input(); let pos = self.position(); match self { Self::EmptyPath | Self::EmptySegment { .. } => None, Self::EscapeAtEnd { .. } => Some(format!("{}!!", &input[..pos])), Self::InvalidEscape { .. } => { Some(format!("{}!!{}", &input[..pos], &input[pos + 1..])) } Self::UnexpectedClose { ch, .. } => { Some(format!("{}!{ch}{}", &input[..pos], &input[pos + 1..])) } Self::Unclosed { open, .. } => { let close = if *open == '[' { ']' } else { '}' }; Some(format!("{input}{close}")) } Self::TrailingDot { .. } => Some(input[..input.len() - 1].to_string()), } } /// Explanation of what the suggestion changes. pub fn suggestion_hint(&self) -> Option { match self { Self::EmptyPath | Self::EmptySegment { .. } => None, Self::EscapeAtEnd { .. } | Self::InvalidEscape { .. } => { Some("escape the '!' as '!!'".into()) } Self::UnexpectedClose { ch, .. } => Some(format!("escape as '!{ch}'")), Self::Unclosed { open, .. } => { let close = if *open == '[' { ']' } else { '}' }; Some(format!("close with '{close}'")) } Self::TrailingDot { .. } => Some("remove the trailing dot".into()), } } } // -- Types -- #[derive(Debug, Clone, PartialEq, Eq)] pub enum Qualifier { Array, ArrayUnion(String), ScalarUnion(String), } #[derive(Debug, Clone, PartialEq, Eq)] pub struct Segment { pub key: String, pub qualifiers: Vec, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum PathType { Scalar, Vector, } #[derive(Debug, Clone, PartialEq, Eq)] pub struct PathInfo { pub path: String, pub path_type: PathType, } // -- Escape -- fn is_structural(ch: char) -> bool { matches!(ch, '.' | '[' | ']' | '{' | '}' | '!') } pub fn escape_field_name(key: &str) -> String { let mut out = String::with_capacity(key.len()); for ch in key.chars() { if is_structural(ch) { out.push('!'); } out.push(ch); } out } // -- Parser -- pub fn parse(input: &str) -> Result, ParseError> { if input.is_empty() { return Err(ParseError::EmptyPath); } let bytes = input.as_bytes(); let len = bytes.len(); let mut segments = Vec::new(); let mut i = 0; while i < len { let mut key = String::new(); while i < len && !matches!(bytes[i], b'.' | b'[' | b'{') { match bytes[i] { b'!' => { if i + 1 >= len { return Err(ParseError::EscapeAtEnd { input: input.into(), position: i, }); } let next = bytes[i + 1] as char; if !is_structural(next) { return Err(ParseError::InvalidEscape { input: input.into(), position: i, ch: next, }); } key.push(next); i += 2; } b']' | b'}' => { return Err(ParseError::UnexpectedClose { input: input.into(), position: i, ch: bytes[i] as char, }); } _ => { // Consume a full UTF-8 character (structural chars are all // ASCII, so non-ASCII bytes are always key content). let start = i; i += 1; while i < len && (bytes[i] & 0xC0) == 0x80 { i += 1; } key.push_str(&input[start..i]); } } } if key.is_empty() { return Err(ParseError::EmptySegment { input: input.into(), position: i, }); } let mut qualifiers = Vec::new(); while i < len && matches!(bytes[i], b'[' | b'{') { let open = bytes[i] as char; let close = if open == '[' { b']' } else { b'}' }; let open_pos = i; i += 1; let content_start = i; while i < len && bytes[i] != close { i += 1; } if i >= len { return Err(ParseError::Unclosed { input: input.into(), position: open_pos, open, }); } let content = &input[content_start..i]; i += 1; qualifiers.push(match (open, content.is_empty()) { ('[', true) => Qualifier::Array, ('[', false) => Qualifier::ArrayUnion(content.into()), (_, _) => Qualifier::ScalarUnion(content.into()), }); } segments.push(Segment { key, qualifiers }); if i < len && bytes[i] == b'.' { i += 1; if i >= len { return Err(ParseError::TrailingDot { input: input.into(), position: i - 1, }); } } } Ok(segments) } // -- Matcher -- pub fn match_path(record: &Value, path: &str) -> Vec { let Ok(segments) = parse(path) else { return vec![]; }; match_segments(record, &segments, 0) } fn match_segments(data: &Value, segments: &[Segment], seg_idx: usize) -> Vec { if seg_idx >= segments.len() { return vec![data.clone()]; } let seg = &segments[seg_idx]; let Some(obj) = data.as_object() else { return vec![]; }; let Some(value) = obj.get(&seg.key) else { return vec![]; }; apply_qualifiers(value, &seg.qualifiers, 0, segments, seg_idx) } fn apply_qualifiers( value: &Value, qualifiers: &[Qualifier], qual_idx: usize, segments: &[Segment], seg_idx: usize, ) -> Vec { let Some(qual) = qualifiers.get(qual_idx) else { // No more qualifiers — advance to next segment or collect return if seg_idx + 1 >= segments.len() { vec![value.clone()] } else { match_segments(value, segments, seg_idx + 1) }; }; match qual { Qualifier::ScalarUnion(nsid) => { let type_matches = value .as_object() .and_then(|o| o.get("$type")) .and_then(|t| t.as_str()) .is_some_and(|t| t == nsid); if type_matches { apply_qualifiers(value, qualifiers, qual_idx + 1, segments, seg_idx) } else { vec![] } } Qualifier::Array | Qualifier::ArrayUnion(_) => { let Some(arr) = value.as_array() else { return vec![]; }; arr.iter() .filter(|elem| match qual { Qualifier::ArrayUnion(nsid) => elem .as_object() .and_then(|o| o.get("$type")) .and_then(|t| t.as_str()) .is_some_and(|t| t == nsid), _ => true, }) .flat_map(|elem| { apply_qualifiers(elem, qualifiers, qual_idx + 1, segments, seg_idx) }) .collect() } } } // -- Enumerator -- const DEFAULT_MAX_DEPTH: usize = 64; /// Returns a lazy iterator over all `(PathInfo, &Value)` pairs reachable from /// a record. Paths are deduplicated; each unique path is yielded once. pub fn enumerate(record: &Value) -> Paths<'_> { Paths::new(record, DEFAULT_MAX_DEPTH) } /// Work items for the stack-based tree walk. enum Work<'a> { /// Yield this path+value if not yet seen. Emit { path: String, path_type: PathType, value: &'a Value, }, /// Expand an object's entries onto the stack. Object { obj: &'a serde_json::Map, prefix: String, is_vector: bool, depth: usize, }, /// Expand an array's elements onto the stack. Array { arr: &'a [Value], arr_value: &'a Value, prefix: String, depth: usize, }, } pub struct Paths<'a> { stack: Vec>, seen: HashSet, max_depth: usize, } impl<'a> Paths<'a> { fn new(record: &'a Value, max_depth: usize) -> Self { let mut paths = Self { stack: Vec::new(), seen: HashSet::new(), max_depth, }; if let Some(obj) = record.as_object() { paths.stack.push(Work::Object { obj, prefix: String::new(), is_vector: false, depth: 0, }); } paths } pub fn with_max_depth(mut self, max_depth: usize) -> Self { self.max_depth = max_depth; self } fn expand_object( &mut self, obj: &'a serde_json::Map, prefix: &str, is_vector: bool, depth: usize, ) { let vtype = if is_vector { PathType::Vector } else { PathType::Scalar }; // Push in reverse so the first key is at the top of the stack. let entries: Vec<_> = obj.iter().collect(); for (key, child) in entries.into_iter().rev() { let escaped = escape_field_name(key); let key_path = if prefix.is_empty() { escaped } else { format!("{prefix}.{escaped}") }; // Push children first (deeper in stack), then the emit (top). match child { Value::Array(arr) => { self.stack.push(Work::Array { arr, arr_value: child, prefix: key_path.clone(), depth: depth + 1, }); self.stack.push(Work::Emit { path: key_path, path_type: vtype, value: child, }); } Value::Object(child_obj) => { match child_obj.get("$type").and_then(|t| t.as_str()) { Some(nsid) => { let qualified = format!("{key_path}{{{nsid}}}"); self.stack.push(Work::Object { obj: child_obj, prefix: qualified.clone(), is_vector, depth: depth + 1, }); self.stack.push(Work::Emit { path: qualified, path_type: vtype, value: child, }); self.stack.push(Work::Emit { path: key_path, path_type: vtype, value: child, }); } None => { self.stack.push(Work::Object { obj: child_obj, prefix: key_path.clone(), is_vector, depth: depth + 1, }); self.stack.push(Work::Emit { path: key_path, path_type: vtype, value: child, }); } } } _ => { self.stack.push(Work::Emit { path: key_path, path_type: vtype, value: child, }); } } } } fn expand_array( &mut self, arr: &'a [Value], arr_value: &'a Value, prefix: &str, depth: usize, ) { let has_union = arr .iter() .any(|el| el.as_object().is_some_and(|o| o.contains_key("$type"))); if has_union { let mut has_plain = false; for el in arr.iter().rev() { match el .as_object() .and_then(|o| o.get("$type")) .and_then(|t| t.as_str()) { Some(nsid) => { let qp = format!("{prefix}[{nsid}]"); if let Some(obj) = el.as_object() { self.stack.push(Work::Object { obj, prefix: qp.clone(), is_vector: true, depth: depth + 1, }); } self.stack.push(Work::Emit { path: qp, path_type: PathType::Vector, value: el, }); } None => { has_plain = true; self.expand_child_value(el, &format!("{prefix}[]"), depth + 1); } } } if has_plain { self.stack.push(Work::Emit { path: format!("{prefix}[]"), path_type: PathType::Vector, value: arr_value, }); } } else { let bare = format!("{prefix}[]"); for el in arr.iter().rev() { self.expand_child_value(el, &bare, depth + 1); } self.stack.push(Work::Emit { path: bare, path_type: PathType::Vector, value: arr_value, }); } } fn expand_child_value(&mut self, value: &'a Value, prefix: &str, depth: usize) { match value { Value::Object(obj) => { self.stack.push(Work::Object { obj, prefix: prefix.to_string(), is_vector: true, depth, }); } Value::Array(arr) => { self.stack.push(Work::Array { arr, arr_value: value, prefix: prefix.to_string(), depth, }); } _ => {} } } } impl<'a> Iterator for Paths<'a> { type Item = (PathInfo, &'a Value); fn next(&mut self) -> Option { loop { match self.stack.pop()? { Work::Emit { path, path_type, value, } => { if self.seen.insert(path.clone()) { return Some((PathInfo { path, path_type }, value)); } } Work::Object { obj, prefix, is_vector, depth, } => { if depth <= self.max_depth { self.expand_object(obj, &prefix, is_vector, depth); } } Work::Array { arr, arr_value, prefix, depth, } => { if depth <= self.max_depth { self.expand_array(arr, arr_value, &prefix, depth); } } } } } } // -- is_vector -- pub fn is_vector(path: &str) -> bool { let bytes = path.as_bytes(); let mut i = 0; while i < bytes.len() { match bytes[i] { b'!' if i + 1 < bytes.len() => i += 2, b'[' => return true, _ => i += 1, } } false }