From f5514c9aca88cafb7d690900716193982181a971 Mon Sep 17 00:00:00 2001 From: Marko Mikulicic Date: Tue, 6 Oct 2020 15:46:16 +0200 Subject: [PATCH 1/7] Play with explicit AST --- src/ast.rs | 143 ++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 3 + src/parser_ast.rs | 131 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 277 insertions(+) create mode 100644 src/ast.rs create mode 100644 src/parser_ast.rs diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..4b11494 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,143 @@ +/* + * Copyright 2020 VMware, Inc. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +use serde_json::Value; + +/// A path is a tree of selector nodes. +/// +/// For example, the JSONPath `$.foo.bar` yields this AST: +/// +/// ```text +/// * +/// / \ +/// * \___ DotName("bar") +/// / \ +/// * \___ DotName("foo") +/// / +/// Root ___/ +/// ``` +/// +/// A more complicated example: `$.foo[1,2]["bar"]`: +/// +/// ```text +/// * +/// / \ +/// * \___ Union +/// / \ \ +/// * \___ Union \ +/// / \ [Field("bar")] +/// * \ +/// / \ [Number(1), Number(2)] +/// / \ +/// Root ___/ \___ DotName("foo") +/// ``` +/// +#[derive(Debug)] +pub enum Path { + Root, + Sel(Box, Selector), +} + +#[derive(Debug)] +pub enum Selector { + Union(Vec), + DotName(String), + DotWildcard, +} + +#[derive(Debug)] +pub enum Index { + Field(String), + Number(i64), +} + +type Iter<'a> = Box + 'a>; + +impl Path { + pub fn find<'a>(&'a self, input: &'a Value) -> Iter<'a> { + match self { + Path::Root => Box::new(std::iter::once(input)), + Path::Sel(left, sel) => Box::new(left.find(input).flat_map(move |v| sel.find(v))), + } + } +} + +impl Selector { + pub fn find<'a>(&'a self, input: &'a Value) -> Iter<'a> { + match self { + Selector::Union(indices) => Box::new(indices.iter().flat_map(move |i| i.get(input))), + Selector::DotName(name) => Box::new(input.get(name).into_iter()), + Selector::DotWildcard => match input { + Value::Object(m) => Box::new(m.values()), + Value::Array(a) => Box::new(a.iter()), + _ => Box::new(std::iter::empty()), + }, + } + } +} + +impl Index { + pub fn get<'a>(&self, v: &'a Value) -> Iter<'a> { + match self { + Index::Field(name) => Box::new(v.get(name).into_iter()), + Index::Number(num) => Box::new(v.get(abs_index(*num, v)).into_iter()), + } + } +} + +fn abs_index(index: i64, node: &Value) -> usize { + if index >= 0 { + index as usize + } else { + let len = if let Value::Array(a) = node { + a.len() as i64 + } else { + 0 + }; + (len + index) as usize + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::parser_ast::parse; + use serde_json::json; + + #[test] + fn demo() { + let a1 = Path::Sel( + Box::new(Path::Root), + Selector::DotName("foo".to_owned()), + ); + let a2 = Path::Sel(Box::new(a1), Selector::DotName("bar".to_owned())); + let a3 = Path::Sel( + Box::new(a2), + Selector::Union(vec![Index::Field("baz".to_owned())]), + ); + let a4 = Path::Sel(Box::new(a3), Selector::Union(vec![Index::Number(4)])); + + let j = json!({"foo":{"bar":{"baz":[10,20,30,40,50,60]}}}); + println!("j: {}", j); + + let v = a4.find(&j).collect::>(); + assert_eq!(v[0], 50); + } + + #[test] + fn parse_demo() -> Result<(), String> { + let p = parse("$.foo['bar'].*[4,-1]")?; + println!("AST: {:?}", &p); + let j = json!({"foo":{"bar":{"baz":[10,20,30,40,50,60]}}}); + + let v = p.find(&j).collect::>(); + println!("RES: {:?}", v); + + assert_eq!(v[0], 50); + assert_eq!(v[1], 60); + Ok(()) + } +} diff --git a/src/lib.rs b/src/lib.rs index 3238c41..652d8a2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,3 +12,6 @@ pub mod jsonpath; mod matchers; mod parser; pub mod path; + +pub mod ast; +pub mod parser_ast; diff --git a/src/parser_ast.rs b/src/parser_ast.rs new file mode 100644 index 0000000..df94e20 --- /dev/null +++ b/src/parser_ast.rs @@ -0,0 +1,131 @@ +/* + * Copyright 2020 VMware, Inc. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +use crate::ast::*; +use crate::pest::Parser; + +#[derive(Parser)] +#[grammar = "grammar.pest"] +struct PathParser; + +pub fn parse(selector: &str) -> Result { + let selector_rule = PathParser::parse(Rule::selector, selector) + .map_err(|e| format!("{}", e))? + .next() + .unwrap(); + + let mut res = Path::Root; + for r in selector_rule.into_inner() { + res = match r.as_rule() { + Rule::rootSelector => res, // TODO: fix grammar so that this is a silent rule since we don't need it + Rule::matcher => Path::Sel(Box::new(res), parse_selector(r)), + _ => panic!("invalid parse tree {:?}", r), + } + } + Ok(res) +} + +fn parse_selector(matcher_rule: pest::iterators::Pair) -> Selector { + let r = matcher_rule.into_inner().next().unwrap(); + + match r.as_rule() { + Rule::wildcardedDotChild => Selector::DotWildcard, + Rule::namedDotChild => Selector::DotName(parse_child_name(r)), + Rule::union => Selector::Union(parse_union_indices(r)), + _ => panic!("invalid parse tree {:?}", r), + } +} + +fn parse_child_name(matcher_rule: pest::iterators::Pair) -> String { + let r = matcher_rule.into_inner().next().unwrap(); + match r.as_rule() { + Rule::childName => r.as_str().to_owned(), + _ => panic!("invalid parse tree {:?}", r), + } +} + +fn parse_union_indices(matcher_rule: pest::iterators::Pair) -> Vec { + let mut res = Vec::new(); + + for r in matcher_rule.into_inner() { + match r.as_rule() { + Rule::unionChild => res.append(&mut parse_union_child(r)), + Rule::unionArrayIndex => res.push(parse_union_array_index(r)), + _ => panic!("invalid parse tree {:?}", r), + } + } + res +} + +fn parse_union_child(matcher_rule: pest::iterators::Pair) -> Vec { + let mut res = Vec::new(); + for r in matcher_rule.into_inner() { + match r.as_rule() { + Rule::doubleInner => res.push(Index::Field(unescape(r.as_str()))), + Rule::singleInner => res.push(Index::Field(unescape_single(r.as_str()))), + _ => panic!("invalid parse tree {:?}", r), + } + } + res +} + +fn parse_union_array_index(matcher_rule: pest::iterators::Pair) -> Index { + let i = matcher_rule.as_str().parse().unwrap(); + Index::Number(i) +} + +fn unescape(contents: &str) -> String { + let s = format!(r#""{}""#, contents); + serde_json::from_str(&s).unwrap() +} + +fn unescape_single(contents: &str) -> String { + let d = to_double_quoted(contents); + unescape(&d) +} + +// converts a single quoted string body into a string that can be unescaped +// by a function that knows how to unescape double quoted string, +// It works by unescaping single quotes and escaping double quotes while leaving +// everything else untouched. +fn to_double_quoted(contents: &str) -> String { + let mut output = String::new(); + let mut escaping = false; + for ch in contents.chars() { + if !escaping { + if ch == '\\' { + escaping = true; + } else { + if ch == '"' { + output.push('\\'); + } + output.push(ch); + } + } else { + escaping = false; + if ch != '\'' { + output.push('\\'); + }; + output.push(ch); + } + } + output +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_to_double() { + assert_eq!(to_double_quoted(r#"ab"#), r#"ab"#); + assert_eq!(to_double_quoted(r#"a"b"#), r#"a\"b"#); + assert_eq!(to_double_quoted(r#"a\'b"#), r#"a'b"#); + assert_eq!(to_double_quoted(r#"a\nb"#), r#"a\nb"#); + assert_eq!(to_double_quoted(r#"a\bb"#), r#"a\bb"#); + assert_eq!(to_double_quoted(r#"a\\b"#), r#"a\\b"#); + } +} From 2ad82413d87d80837d4219223de765080e342552 Mon Sep 17 00:00:00 2001 From: Marko Mikulicic Date: Wed, 7 Oct 2020 11:29:23 +0200 Subject: [PATCH 2/7] Use new AST base parser by default --- src/ast.rs | 7 +- src/jsonpath.rs | 20 +++++- src/lib.rs | 6 +- src/matchers.rs | 176 ---------------------------------------------- src/parser.rs | 112 +++++++++-------------------- src/parser_ast.rs | 131 ---------------------------------- src/path.rs | 35 --------- tests/cts.rs | 2 +- 8 files changed, 56 insertions(+), 433 deletions(-) delete mode 100644 src/matchers.rs delete mode 100644 src/parser_ast.rs delete mode 100644 src/path.rs diff --git a/src/ast.rs b/src/ast.rs index 4b11494..f1594f0 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -104,15 +104,12 @@ fn abs_index(index: i64, node: &Value) -> usize { #[cfg(test)] mod test { use super::*; - use crate::parser_ast::parse; + use crate::parser::parse; use serde_json::json; #[test] fn demo() { - let a1 = Path::Sel( - Box::new(Path::Root), - Selector::DotName("foo".to_owned()), - ); + let a1 = Path::Sel(Box::new(Path::Root), Selector::DotName("foo".to_owned())); let a2 = Path::Sel(Box::new(a1), Selector::DotName("bar".to_owned())); let a3 = Path::Sel( Box::new(a2), diff --git a/src/jsonpath.rs b/src/jsonpath.rs index 72a7ce8..2182a44 100644 --- a/src/jsonpath.rs +++ b/src/jsonpath.rs @@ -4,8 +4,9 @@ * SPDX-License-Identifier: BSD-2-Clause */ +use crate::ast; use crate::parser; -use crate::path::Path; +use serde_json::Value; #[derive(Debug)] pub struct SyntaxError { @@ -18,6 +19,19 @@ impl std::fmt::Display for SyntaxError { } } -pub fn parse(selector: &str) -> Result { - parser::parse(selector).map_err(|m| SyntaxError { message: m }) +pub enum FindError { + // no errors yet +} + +pub fn parse(selector: &str) -> Result { + let p = parser::parse(selector).map_err(|m| SyntaxError { message: m })?; + Ok(Path(p)) +} + +pub struct Path(ast::Path); + +impl Path { + pub fn find<'a>(&'a self, document: &'a Value) -> Result, FindError> { + Ok(self.0.find(document).collect()) + } } diff --git a/src/lib.rs b/src/lib.rs index 652d8a2..44f42a7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,10 +8,6 @@ extern crate pest; #[macro_use] extern crate pest_derive; +pub mod ast; pub mod jsonpath; -mod matchers; mod parser; -pub mod path; - -pub mod ast; -pub mod parser_ast; diff --git a/src/matchers.rs b/src/matchers.rs deleted file mode 100644 index 1605656..0000000 --- a/src/matchers.rs +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright 2020 VMware, Inc. - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -use serde_json::Value; -use std::iter; - -/// An iterator over matcher selection results. -type Iter<'a> = Box + 'a>; - -/// Matcher maps a node to a list of nodes. If the input node is not matched by the matcher or -/// the matcher does not select any subnodes of the input node, then the result is empty. -pub trait Matcher { - fn select<'a>(&'a self, node: &'a Value) -> Iter<'a>; -} - -/// Selects exactly one item, namely the node -/// of the subtree the selector is applied to. -/// -/// (which may or may be not the actual root of the document). -pub struct RootSelector {} - -impl Matcher for RootSelector { - fn select<'a>(&self, node: &'a Value) -> Iter<'a> { - Box::new(iter::once(node)) - } -} - -/// Selects all children of a node. -pub struct WildcardedChild {} - -impl Matcher for WildcardedChild { - fn select<'a>(&self, node: &'a Value) -> Iter<'a> { - match node { - Value::Object(m) => Box::new(m.values()), - Value::Array(a) => Box::new(a.iter()), - _ => Box::new(iter::empty()), - } - } -} - -/// Selects a named child. -pub struct Child { - name: String, -} - -impl Child { - pub fn new(name: String) -> Self { - Child { name } - } -} - -impl Matcher for Child { - fn select<'a>(&self, node: &'a Value) -> Iter<'a> { - Box::new(node.get(&self.name).into_iter()) - } -} - -/// Selects an array item by index. -/// -/// If the index is negative, it references element len-abs(index). -pub struct ArrayIndex { - index: i64, -} - -impl ArrayIndex { - pub fn new(index: i64) -> Self { - ArrayIndex { index } - } -} - -impl Matcher for ArrayIndex { - fn select<'a>(&self, node: &'a Value) -> Iter<'a> { - let idx = if self.index >= 0 { - self.index as usize - } else { - let len = if let Value::Array(a) = node { - a.len() as i64 - } else { - 0 - }; - (len + self.index) as usize - }; - Box::new(node.get(idx).into_iter()) - } -} - -/// Applies a sequence of selectors on the same node and returns -/// a concatenation of the results. -pub struct Union { - elements: Vec>, -} - -impl Union { - pub fn new(elements: Vec>) -> Self { - Union { elements } - } -} - -impl Matcher for Union { - fn select<'a>(&'a self, node: &'a Value) -> Iter<'a> { - Box::new(self.elements.iter().flat_map(move |it| it.select(node))) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use serde_json::{json, Value}; - - #[test] - fn object_wildcard() { - let s = WildcardedChild {}; - let j = json!({"a": 1, "b": 2}); - let r: Vec<&Value> = s.select(&j).collect(); - assert_eq!(format!("{:?}", r), "[Number(1), Number(2)]"); - } - - #[test] - fn array_wildcard() { - let s = WildcardedChild {}; - let j = json!([1, 2]); - let r: Vec<&Value> = s.select(&j).collect(); - assert_eq!(format!("{:?}", r), "[Number(1), Number(2)]"); - } - - #[test] - fn array_index() { - let s = ArrayIndex::new(1); - let j = json!([1, 2]); - let r: Vec<&Value> = s.select(&j).collect(); - assert_eq!(format!("{:?}", r), "[Number(2)]"); - } - - #[test] - fn array_index_zero() { - let s = ArrayIndex::new(0); - let j = json!([1, 2]); - let r: Vec<&Value> = s.select(&j).collect(); - assert_eq!(format!("{:?}", r), "[Number(1)]"); - } - - #[test] - fn array_index_oob() { - let s = ArrayIndex::new(4); - let j = json!([1, 2]); - let r: Vec<&Value> = s.select(&j).collect(); - assert_eq!(r.len(), 0); - } - - #[test] - fn array_index_negative() { - let s = ArrayIndex::new(-1); - let j = json!([1, 2]); - let r: Vec<&Value> = s.select(&j).collect(); - assert_eq!(format!("{:?}", r), "[Number(2)]"); - } - - #[test] - fn array_index_negative_extreme() { - let s = ArrayIndex::new(-2); - let j = json!([1, 2]); - let r: Vec<&Value> = s.select(&j).collect(); - assert_eq!(format!("{:?}", r), "[Number(1)]"); - } - - #[test] - fn array_index_negative_oob() { - let s = ArrayIndex::new(-10); - let j = json!([1, 2]); - let r: Vec<&Value> = s.select(&j).collect(); - assert_eq!(r.len(), 0); - } -} diff --git a/src/parser.rs b/src/parser.rs index 7abaec7..edd3921 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4,119 +4,77 @@ * SPDX-License-Identifier: BSD-2-Clause */ -use crate::matchers; -use crate::path; +pub use crate::ast::*; use crate::pest::Parser; #[derive(Parser)] #[grammar = "grammar.pest"] struct PathParser; -pub fn parse(selector: &str) -> Result { +pub fn parse(selector: &str) -> Result { let selector_rule = PathParser::parse(Rule::selector, selector) .map_err(|e| format!("{}", e))? .next() .unwrap(); - let mut ms: Vec> = Vec::new(); + let mut res = Path::Root; for r in selector_rule.into_inner() { - match r.as_rule() { - Rule::rootSelector => ms.push(Box::new(matchers::RootSelector {})), - - Rule::matcher => { - for m in parse_matcher(r) { - ms.push(m) - } - } - - _ => println!("r={:?}", r), + res = match r.as_rule() { + Rule::rootSelector => res, // TODO: fix grammar so that this is a silent rule since we don't need it + Rule::matcher => Path::Sel(Box::new(res), parse_selector(r)), + _ => panic!("invalid parse tree {:?}", r), } } - - Ok(path::new(ms)) + Ok(res) } -fn parse_matcher(matcher_rule: pest::iterators::Pair) -> Vec> { - let mut ms: Vec> = Vec::new(); - for r in matcher_rule.into_inner() { - match r.as_rule() { - Rule::wildcardedDotChild => ms.push(Box::new(matchers::WildcardedChild {})), - - Rule::namedDotChild => { - for m in parse_dot_child_matcher(r) { - ms.push(m) - } - } - - Rule::union => { - for m in parse_union(r) { - ms.push(m) - } - } +fn parse_selector(matcher_rule: pest::iterators::Pair) -> Selector { + let r = matcher_rule.into_inner().next().unwrap(); - _ => (), - } + match r.as_rule() { + Rule::wildcardedDotChild => Selector::DotWildcard, + Rule::namedDotChild => Selector::DotName(parse_child_name(r)), + Rule::union => Selector::Union(parse_union_indices(r)), + _ => panic!("invalid parse tree {:?}", r), } - ms } -fn parse_dot_child_matcher( - matcher_rule: pest::iterators::Pair, -) -> Vec> { - let mut ms: Vec> = Vec::new(); - for r in matcher_rule.into_inner() { - if let Rule::childName = r.as_rule() { - ms.push(Box::new(matchers::Child::new(r.as_str().to_owned()))); - } +fn parse_child_name(matcher_rule: pest::iterators::Pair) -> String { + let r = matcher_rule.into_inner().next().unwrap(); + match r.as_rule() { + Rule::childName => r.as_str().to_owned(), + _ => panic!("invalid parse tree {:?}", r), } - ms } -fn parse_union(matcher_rule: pest::iterators::Pair) -> Vec> { - let mut ms: Vec> = Vec::new(); +fn parse_union_indices(matcher_rule: pest::iterators::Pair) -> Vec { + let mut res = Vec::new(); + for r in matcher_rule.into_inner() { match r.as_rule() { - Rule::unionChild => { - for m in parse_union_child(r) { - ms.push(m) - } - } - Rule::unionArrayIndex => { - for m in parse_union_array_index(r) { - ms.push(m) - } - } - _ => {} + Rule::unionChild => res.append(&mut parse_union_child(r)), + Rule::unionArrayIndex => res.push(parse_union_array_index(r)), + _ => panic!("invalid parse tree {:?}", r), } } - vec![Box::new(matchers::Union::new(ms))] + res } -fn parse_union_child(matcher_rule: pest::iterators::Pair) -> Vec> { - let mut ms: Vec> = Vec::new(); +fn parse_union_child(matcher_rule: pest::iterators::Pair) -> Vec { + let mut res = Vec::new(); for r in matcher_rule.into_inner() { match r.as_rule() { - Rule::doubleInner => { - ms.push(Box::new(matchers::Child::new(unescape(r.as_str())))); - } - - Rule::singleInner => { - ms.push(Box::new(matchers::Child::new(unescape_single(r.as_str())))); - } - - _ => (), + Rule::doubleInner => res.push(Index::Field(unescape(r.as_str()))), + Rule::singleInner => res.push(Index::Field(unescape_single(r.as_str()))), + _ => panic!("invalid parse tree {:?}", r), } } - ms + res } -fn parse_union_array_index( - matcher_rule: pest::iterators::Pair, -) -> Vec> { - let mut ms: Vec> = Vec::new(); +fn parse_union_array_index(matcher_rule: pest::iterators::Pair) -> Index { let i = matcher_rule.as_str().parse().unwrap(); - ms.push(Box::new(matchers::ArrayIndex::new(i))); - ms + Index::Number(i) } fn unescape(contents: &str) -> String { diff --git a/src/parser_ast.rs b/src/parser_ast.rs deleted file mode 100644 index df94e20..0000000 --- a/src/parser_ast.rs +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright 2020 VMware, Inc. - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -use crate::ast::*; -use crate::pest::Parser; - -#[derive(Parser)] -#[grammar = "grammar.pest"] -struct PathParser; - -pub fn parse(selector: &str) -> Result { - let selector_rule = PathParser::parse(Rule::selector, selector) - .map_err(|e| format!("{}", e))? - .next() - .unwrap(); - - let mut res = Path::Root; - for r in selector_rule.into_inner() { - res = match r.as_rule() { - Rule::rootSelector => res, // TODO: fix grammar so that this is a silent rule since we don't need it - Rule::matcher => Path::Sel(Box::new(res), parse_selector(r)), - _ => panic!("invalid parse tree {:?}", r), - } - } - Ok(res) -} - -fn parse_selector(matcher_rule: pest::iterators::Pair) -> Selector { - let r = matcher_rule.into_inner().next().unwrap(); - - match r.as_rule() { - Rule::wildcardedDotChild => Selector::DotWildcard, - Rule::namedDotChild => Selector::DotName(parse_child_name(r)), - Rule::union => Selector::Union(parse_union_indices(r)), - _ => panic!("invalid parse tree {:?}", r), - } -} - -fn parse_child_name(matcher_rule: pest::iterators::Pair) -> String { - let r = matcher_rule.into_inner().next().unwrap(); - match r.as_rule() { - Rule::childName => r.as_str().to_owned(), - _ => panic!("invalid parse tree {:?}", r), - } -} - -fn parse_union_indices(matcher_rule: pest::iterators::Pair) -> Vec { - let mut res = Vec::new(); - - for r in matcher_rule.into_inner() { - match r.as_rule() { - Rule::unionChild => res.append(&mut parse_union_child(r)), - Rule::unionArrayIndex => res.push(parse_union_array_index(r)), - _ => panic!("invalid parse tree {:?}", r), - } - } - res -} - -fn parse_union_child(matcher_rule: pest::iterators::Pair) -> Vec { - let mut res = Vec::new(); - for r in matcher_rule.into_inner() { - match r.as_rule() { - Rule::doubleInner => res.push(Index::Field(unescape(r.as_str()))), - Rule::singleInner => res.push(Index::Field(unescape_single(r.as_str()))), - _ => panic!("invalid parse tree {:?}", r), - } - } - res -} - -fn parse_union_array_index(matcher_rule: pest::iterators::Pair) -> Index { - let i = matcher_rule.as_str().parse().unwrap(); - Index::Number(i) -} - -fn unescape(contents: &str) -> String { - let s = format!(r#""{}""#, contents); - serde_json::from_str(&s).unwrap() -} - -fn unescape_single(contents: &str) -> String { - let d = to_double_quoted(contents); - unescape(&d) -} - -// converts a single quoted string body into a string that can be unescaped -// by a function that knows how to unescape double quoted string, -// It works by unescaping single quotes and escaping double quotes while leaving -// everything else untouched. -fn to_double_quoted(contents: &str) -> String { - let mut output = String::new(); - let mut escaping = false; - for ch in contents.chars() { - if !escaping { - if ch == '\\' { - escaping = true; - } else { - if ch == '"' { - output.push('\\'); - } - output.push(ch); - } - } else { - escaping = false; - if ch != '\'' { - output.push('\\'); - }; - output.push(ch); - } - } - output -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_to_double() { - assert_eq!(to_double_quoted(r#"ab"#), r#"ab"#); - assert_eq!(to_double_quoted(r#"a"b"#), r#"a\"b"#); - assert_eq!(to_double_quoted(r#"a\'b"#), r#"a'b"#); - assert_eq!(to_double_quoted(r#"a\nb"#), r#"a\nb"#); - assert_eq!(to_double_quoted(r#"a\bb"#), r#"a\bb"#); - assert_eq!(to_double_quoted(r#"a\\b"#), r#"a\\b"#); - } -} diff --git a/src/path.rs b/src/path.rs deleted file mode 100644 index 3c4d234..0000000 --- a/src/path.rs +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2020 VMware, Inc. - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -use crate::matchers; -use serde_json::Value; - -pub enum FindError { - // no errors yet -} - -pub trait Path { - fn find<'a>(&'a self, document: &'a Value) -> Result, FindError>; -} - -struct SelectorPath { - matchers: Vec>, -} - -pub fn new(matchers: Vec>) -> impl Path { - SelectorPath { matchers } -} - -impl Path for SelectorPath { - fn find<'a>(&'a self, document: &'a Value) -> Result, FindError> { - // pass nodes, starting with document alone, through each matcher in turn - Ok((&self.matchers) - .iter() - .fold(vec![document], |nodes, matcher| { - nodes.iter().flat_map(|node| matcher.select(node)).collect() - })) - } -} diff --git a/tests/cts.rs b/tests/cts.rs index 166fe55..30b5de9 100644 --- a/tests/cts.rs +++ b/tests/cts.rs @@ -6,7 +6,7 @@ #[cfg(test)] mod tests { - use jsonpath_reference_implementation::{jsonpath, path::Path as _}; + use jsonpath_reference_implementation::jsonpath; use serde::{Deserialize, Serialize}; use std::fs; use std::panic; From 44915665b0c494618da491c3c8963d0da0c2cec9 Mon Sep 17 00:00:00 2001 From: Marko Mikulicic Date: Wed, 7 Oct 2020 11:52:45 +0200 Subject: [PATCH 3/7] Simplify parser --- src/grammar.pest | 4 ++-- src/parser.rs | 26 +++++++++++--------------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/src/grammar.pest b/src/grammar.pest index 476a6eb..b1ec570 100644 --- a/src/grammar.pest +++ b/src/grammar.pest @@ -1,6 +1,6 @@ -selector = _{ SOI ~ jsonPath ~ EOI } +selector = _{ SOI ~ rootSelector ~ jsonPath ~ EOI } -jsonPath = ${ rootSelector ~ matcher* } +jsonPath = ${ matcher* } rootSelector = { "$" } matcher = { dotChild | union } diff --git a/src/parser.rs b/src/parser.rs index edd3921..688c53a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -14,18 +14,15 @@ struct PathParser; pub fn parse(selector: &str) -> Result { let selector_rule = PathParser::parse(Rule::selector, selector) .map_err(|e| format!("{}", e))? - .next() + .nth(1) .unwrap(); - let mut res = Path::Root; - for r in selector_rule.into_inner() { - res = match r.as_rule() { - Rule::rootSelector => res, // TODO: fix grammar so that this is a silent rule since we don't need it - Rule::matcher => Path::Sel(Box::new(res), parse_selector(r)), + Ok(selector_rule + .into_inner() + .fold(Path::Root, |prev, r| match r.as_rule() { + Rule::matcher => Path::Sel(Box::new(prev), parse_selector(r)), _ => panic!("invalid parse tree {:?}", r), - } - } - Ok(res) + })) } fn parse_selector(matcher_rule: pest::iterators::Pair) -> Selector { @@ -41,6 +38,7 @@ fn parse_selector(matcher_rule: pest::iterators::Pair) -> Selector { fn parse_child_name(matcher_rule: pest::iterators::Pair) -> String { let r = matcher_rule.into_inner().next().unwrap(); + match r.as_rule() { Rule::childName => r.as_str().to_owned(), _ => panic!("invalid parse tree {:?}", r), @@ -61,15 +59,13 @@ fn parse_union_indices(matcher_rule: pest::iterators::Pair) -> Vec } fn parse_union_child(matcher_rule: pest::iterators::Pair) -> Vec { - let mut res = Vec::new(); - for r in matcher_rule.into_inner() { + matcher_rule.into_inner().map(|r| match r.as_rule() { - Rule::doubleInner => res.push(Index::Field(unescape(r.as_str()))), - Rule::singleInner => res.push(Index::Field(unescape_single(r.as_str()))), + Rule::doubleInner => Index::Field(unescape(r.as_str())), + Rule::singleInner => Index::Field(unescape_single(r.as_str())), _ => panic!("invalid parse tree {:?}", r), } - } - res + ).collect() } fn parse_union_array_index(matcher_rule: pest::iterators::Pair) -> Index { From 9503b4a865cd0b5350943ed654f26df856e901b7 Mon Sep 17 00:00:00 2001 From: Marko Mikulicic Date: Wed, 7 Oct 2020 12:13:09 +0200 Subject: [PATCH 4/7] Improve AST diagram --- src/ast.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index f1594f0..23fd1d0 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -11,11 +11,11 @@ use serde_json::Value; /// For example, the JSONPath `$.foo.bar` yields this AST: /// /// ```text -/// * +/// ^ /// / \ -/// * \___ DotName("bar") +/// ^ \___ DotName("bar") /// / \ -/// * \___ DotName("foo") +/// ^ \___ DotName("foo") /// / /// Root ___/ /// ``` @@ -23,13 +23,13 @@ use serde_json::Value; /// A more complicated example: `$.foo[1,2]["bar"]`: /// /// ```text -/// * +/// ^ /// / \ -/// * \___ Union +/// ^ \___ Union /// / \ \ -/// * \___ Union \ +/// ^ \___ Union \ /// / \ [Field("bar")] -/// * \ +/// ^ \ /// / \ [Number(1), Number(2)] /// / \ /// Root ___/ \___ DotName("foo") From eca69d8287572ba0bbd5aee9d7f0d2a2fe2dfc31 Mon Sep 17 00:00:00 2001 From: Marko Mikulicic Date: Wed, 7 Oct 2020 12:19:52 +0200 Subject: [PATCH 5/7] Fix fmt --- src/parser.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 688c53a..34b4995 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -59,13 +59,14 @@ fn parse_union_indices(matcher_rule: pest::iterators::Pair) -> Vec } fn parse_union_child(matcher_rule: pest::iterators::Pair) -> Vec { - matcher_rule.into_inner().map(|r| - match r.as_rule() { + matcher_rule + .into_inner() + .map(|r| match r.as_rule() { Rule::doubleInner => Index::Field(unescape(r.as_str())), Rule::singleInner => Index::Field(unescape_single(r.as_str())), _ => panic!("invalid parse tree {:?}", r), - } - ).collect() + }) + .collect() } fn parse_union_array_index(matcher_rule: pest::iterators::Pair) -> Index { From 644ddc923eb890b62b91a04590873c79af288e73 Mon Sep 17 00:00:00 2001 From: Marko Mikulicic Date: Wed, 7 Oct 2020 13:09:10 +0200 Subject: [PATCH 6/7] Fix diagram --- src/ast.rs | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 23fd1d0..7e5692b 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -11,30 +11,30 @@ use serde_json::Value; /// For example, the JSONPath `$.foo.bar` yields this AST: /// /// ```text -/// ^ -/// / \ -/// ^ \___ DotName("bar") -/// / \ -/// ^ \___ DotName("foo") -/// / -/// Root ___/ +/// ^ +/// / \ +/// ^ \___ DotName("bar") +/// / \ +/// Root ___ / \___ DotName("foo") /// ``` /// /// A more complicated example: `$.foo[1,2]["bar"]`: /// /// ```text -/// ^ -/// / \ -/// ^ \___ Union -/// / \ \ -/// ^ \___ Union \ -/// / \ [Field("bar")] -/// ^ \ -/// / \ [Number(1), Number(2)] -/// / \ -/// Root ___/ \___ DotName("foo") +/// ^ +/// / \ +/// ^ \___ Union +/// / \ \ +/// / \___ Union \___ [Field("bar")] +/// / \ +/// ^ \___ [Number(1), Number(2)] +/// / \ +/// Root ___/ \___ DotName("foo") /// ``` /// +/// Selectors are left associative, thus `$.foo[1,2]["bar"]` behaves +/// like (pseudocode) `(($.foo)[1,2])["bar"]`; thus the root of the resulting +/// tree is actually the right-most selector (the last one to be applied). #[derive(Debug)] pub enum Path { Root, From 2ec6147d6dc6aebd784d56b4d470d3e3a8fcd5ef Mon Sep 17 00:00:00 2001 From: Marko Mikulicic Date: Thu, 8 Oct 2020 11:47:25 +0200 Subject: [PATCH 7/7] Fix nomenclature --- src/ast.rs | 28 +++++++++++++++++----------- src/grammar.pest | 4 ++-- src/parser.rs | 12 ++++++------ 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 7e5692b..4e9d9a7 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -25,9 +25,9 @@ use serde_json::Value; /// / \ /// ^ \___ Union /// / \ \ -/// / \___ Union \___ [Field("bar")] +/// / \___ Union \___ [Name("bar")] /// / \ -/// ^ \___ [Number(1), Number(2)] +/// ^ \___ [Index(1), Index(2)] /// / \ /// Root ___/ \___ DotName("foo") /// ``` @@ -35,6 +35,12 @@ use serde_json::Value; /// Selectors are left associative, thus `$.foo[1,2]["bar"]` behaves /// like (pseudocode) `(($.foo)[1,2])["bar"]`; thus the root of the resulting /// tree is actually the right-most selector (the last one to be applied). +/// +/// The Path::Root AST node is called "root" because that's the +/// name of the node in the JSONPath grammar. It represents the source of +/// the json value stream which gets operated upon by Selector nodes. +/// This is why despite being called "root", this node doesn't lie at the root +/// of the AST tree. #[derive(Debug)] pub enum Path { Root, @@ -43,15 +49,15 @@ pub enum Path { #[derive(Debug)] pub enum Selector { - Union(Vec), + Union(Vec), DotName(String), DotWildcard, } #[derive(Debug)] -pub enum Index { - Field(String), - Number(i64), +pub enum UnionElement { + Name(String), + Index(i64), } type Iter<'a> = Box + 'a>; @@ -79,11 +85,11 @@ impl Selector { } } -impl Index { +impl UnionElement { pub fn get<'a>(&self, v: &'a Value) -> Iter<'a> { match self { - Index::Field(name) => Box::new(v.get(name).into_iter()), - Index::Number(num) => Box::new(v.get(abs_index(*num, v)).into_iter()), + UnionElement::Name(name) => Box::new(v.get(name).into_iter()), + UnionElement::Index(num) => Box::new(v.get(abs_index(*num, v)).into_iter()), } } } @@ -113,9 +119,9 @@ mod test { let a2 = Path::Sel(Box::new(a1), Selector::DotName("bar".to_owned())); let a3 = Path::Sel( Box::new(a2), - Selector::Union(vec![Index::Field("baz".to_owned())]), + Selector::Union(vec![UnionElement::Name("baz".to_owned())]), ); - let a4 = Path::Sel(Box::new(a3), Selector::Union(vec![Index::Number(4)])); + let a4 = Path::Sel(Box::new(a3), Selector::Union(vec![UnionElement::Index(4)])); let j = json!({"foo":{"bar":{"baz":[10,20,30,40,50,60]}}}); println!("j: {}", j); diff --git a/src/grammar.pest b/src/grammar.pest index b1ec570..76f46f1 100644 --- a/src/grammar.pest +++ b/src/grammar.pest @@ -1,6 +1,6 @@ -selector = _{ SOI ~ rootSelector ~ jsonPath ~ EOI } +selector = _{ SOI ~ rootSelector ~ matchers ~ EOI } -jsonPath = ${ matcher* } +matchers = ${ matcher* } rootSelector = { "$" } matcher = { dotChild | union } diff --git a/src/parser.rs b/src/parser.rs index 34b4995..a7a5d70 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -45,7 +45,7 @@ fn parse_child_name(matcher_rule: pest::iterators::Pair) -> String { } } -fn parse_union_indices(matcher_rule: pest::iterators::Pair) -> Vec { +fn parse_union_indices(matcher_rule: pest::iterators::Pair) -> Vec { let mut res = Vec::new(); for r in matcher_rule.into_inner() { @@ -58,20 +58,20 @@ fn parse_union_indices(matcher_rule: pest::iterators::Pair) -> Vec res } -fn parse_union_child(matcher_rule: pest::iterators::Pair) -> Vec { +fn parse_union_child(matcher_rule: pest::iterators::Pair) -> Vec { matcher_rule .into_inner() .map(|r| match r.as_rule() { - Rule::doubleInner => Index::Field(unescape(r.as_str())), - Rule::singleInner => Index::Field(unescape_single(r.as_str())), + Rule::doubleInner => UnionElement::Name(unescape(r.as_str())), + Rule::singleInner => UnionElement::Name(unescape_single(r.as_str())), _ => panic!("invalid parse tree {:?}", r), }) .collect() } -fn parse_union_array_index(matcher_rule: pest::iterators::Pair) -> Index { +fn parse_union_array_index(matcher_rule: pest::iterators::Pair) -> UnionElement { let i = matcher_rule.as_str().parse().unwrap(); - Index::Number(i) + UnionElement::Index(i) } fn unescape(contents: &str) -> String {