Skip to content
This repository was archived by the owner on Feb 22, 2024. It is now read-only.

Commit df1f6c6

Browse files
author
Marko Mikulicic
committed
Play with explicit AST
1 parent 4ad5133 commit df1f6c6

File tree

3 files changed

+282
-0
lines changed

3 files changed

+282
-0
lines changed

src/ast.rs

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
/*
2+
* Copyright 2020 VMware, Inc.
3+
*
4+
* SPDX-License-Identifier: BSD-2-Clause
5+
*/
6+
7+
use serde_json::Value;
8+
9+
/// A path is a tree of selector nodes.
10+
///
11+
/// For example, the JSONPath `$.foo.bar` yields this AST:
12+
///
13+
/// ```text
14+
/// *
15+
/// / \
16+
/// * \___ Dot
17+
/// / \ \
18+
/// * \___ Dot \
19+
/// / \ Field("bar")
20+
/// Root ___/ \
21+
/// Field("foo")
22+
/// ```
23+
///
24+
///
25+
///
26+
/// A more complicated example: `$.foo[1,2]["bar"]`:
27+
///
28+
/// ```text
29+
/// *
30+
/// / \
31+
/// * \___ Union
32+
/// / \ \
33+
/// * \___ Union \
34+
/// / \ [Field("bar")]
35+
/// * \
36+
/// / \ [Number(1), Number(2)]
37+
/// Root ___/ \___ Dot
38+
/// \
39+
/// \
40+
/// Field("foo")
41+
/// ```
42+
///
43+
#[derive(Debug)]
44+
pub enum Path {
45+
Root,
46+
Sel(Box<Path>, Selector),
47+
}
48+
49+
#[derive(Debug)]
50+
pub enum Selector {
51+
Dot(Index),
52+
Union(Vec<Index>),
53+
Wildcard,
54+
}
55+
56+
#[derive(Debug)]
57+
pub enum Index {
58+
Field(String),
59+
Number(i64),
60+
}
61+
62+
type Iter<'a> = Box<dyn Iterator<Item = &'a Value> + 'a>;
63+
64+
impl Path {
65+
pub fn find<'a>(&'a self, input: &'a Value) -> Iter<'a> {
66+
match self {
67+
Path::Root => Box::new(std::iter::once(input)),
68+
Path::Sel(left, sel) => Box::new(left.find(input).flat_map(move |v| sel.find(v))),
69+
}
70+
}
71+
}
72+
73+
impl Selector {
74+
pub fn find<'a>(&'a self, input: &'a Value) -> Iter<'a> {
75+
match self {
76+
Selector::Dot(index) => Box::new(index.get(input)),
77+
Selector::Union(indices) => Box::new(indices.iter().flat_map(move |i| i.get(input))),
78+
Selector::Wildcard => match input {
79+
Value::Object(m) => Box::new(m.values()),
80+
Value::Array(a) => Box::new(a.iter()),
81+
_ => Box::new(std::iter::empty()),
82+
},
83+
}
84+
}
85+
}
86+
87+
impl Index {
88+
pub fn get<'a>(&self, v: &'a Value) -> Iter<'a> {
89+
match self {
90+
Index::Field(name) => Box::new(v.get(name).into_iter()),
91+
Index::Number(num) => Box::new(v.get(abs_index(*num, v)).into_iter()),
92+
}
93+
}
94+
}
95+
96+
fn abs_index(index: i64, node: &Value) -> usize {
97+
if index >= 0 {
98+
index as usize
99+
} else {
100+
let len = if let Value::Array(a) = node {
101+
a.len() as i64
102+
} else {
103+
0
104+
};
105+
(len + index) as usize
106+
}
107+
}
108+
109+
#[cfg(test)]
110+
mod test {
111+
use super::*;
112+
use crate::parser_ast::parse;
113+
use serde_json::json;
114+
115+
#[test]
116+
fn demo() {
117+
let a1 = Path::Sel(
118+
Box::new(Path::Root),
119+
Selector::Dot(Index::Field("foo".to_owned())),
120+
);
121+
let a2 = Path::Sel(Box::new(a1), Selector::Dot(Index::Field("bar".to_owned())));
122+
let a3 = Path::Sel(
123+
Box::new(a2),
124+
Selector::Union(vec![Index::Field("baz".to_owned())]),
125+
);
126+
let a4 = Path::Sel(Box::new(a3), Selector::Union(vec![Index::Number(4)]));
127+
128+
let j = json!({"foo":{"bar":{"baz":[10,20,30,40,50,60]}}});
129+
println!("j: {}", j);
130+
131+
let v = a4.find(&j).collect::<Vec<_>>();
132+
assert_eq!(v[0], 50);
133+
}
134+
135+
#[test]
136+
fn parse_demo() -> Result<(), String> {
137+
let p = parse("$.foo.bar.*[4,-1]")?;
138+
println!("AST: {:?}", &p);
139+
let j = json!({"foo":{"bar":{"baz":[10,20,30,40,50,60]}}});
140+
141+
let v = p.find(&j).collect::<Vec<_>>();
142+
println!("RES: {:?}", v);
143+
144+
assert_eq!(v[0], 50);
145+
assert_eq!(v[1], 60);
146+
Ok(())
147+
}
148+
}

src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,6 @@ pub mod jsonpath;
1212
mod matchers;
1313
mod parser;
1414
pub mod path;
15+
16+
pub mod ast;
17+
pub mod parser_ast;

src/parser_ast.rs

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
/*
2+
* Copyright 2020 VMware, Inc.
3+
*
4+
* SPDX-License-Identifier: BSD-2-Clause
5+
*/
6+
7+
use crate::ast::*;
8+
use crate::pest::Parser;
9+
10+
#[derive(Parser)]
11+
#[grammar = "grammar.pest"]
12+
struct PathParser;
13+
14+
pub fn parse(selector: &str) -> Result<Path, String> {
15+
let selector_rule = PathParser::parse(Rule::selector, selector)
16+
.map_err(|e| format!("{}", e))?
17+
.next()
18+
.unwrap();
19+
20+
let mut res = Path::Root;
21+
for r in selector_rule.into_inner() {
22+
res = match r.as_rule() {
23+
Rule::rootSelector => res, // TODO: fix grammar so that this is a silent rule since we don't need it
24+
Rule::matcher => Path::Sel(Box::new(res), parse_selector(r)),
25+
_ => panic!("invalid parse tree {:?}", r),
26+
}
27+
}
28+
Ok(res)
29+
}
30+
31+
fn parse_selector(matcher_rule: pest::iterators::Pair<Rule>) -> Selector {
32+
let r = matcher_rule.into_inner().next().unwrap();
33+
34+
match r.as_rule() {
35+
Rule::wildcardedDotChild => Selector::Wildcard,
36+
Rule::namedDotChild => Selector::Dot(parse_child_name(r)),
37+
Rule::union => Selector::Union(parse_union_indices(r)),
38+
_ => panic!("invalid parse tree {:?}", r),
39+
}
40+
}
41+
42+
fn parse_child_name(matcher_rule: pest::iterators::Pair<Rule>) -> Index {
43+
let r = matcher_rule.into_inner().next().unwrap();
44+
match r.as_rule() {
45+
Rule::childName => Index::Field(r.as_str().to_owned()),
46+
_ => panic!("invalid parse tree {:?}", r),
47+
}
48+
}
49+
50+
fn parse_union_indices(matcher_rule: pest::iterators::Pair<Rule>) -> Vec<Index> {
51+
let mut res = Vec::new();
52+
53+
for r in matcher_rule.into_inner() {
54+
match r.as_rule() {
55+
Rule::unionChild => res.append(&mut parse_union_child(r)),
56+
Rule::unionArrayIndex => res.push(parse_union_array_index(r)),
57+
_ => panic!("invalid parse tree {:?}", r),
58+
}
59+
}
60+
res
61+
}
62+
63+
fn parse_union_child(matcher_rule: pest::iterators::Pair<Rule>) -> Vec<Index> {
64+
let mut res = Vec::new();
65+
for r in matcher_rule.into_inner() {
66+
match r.as_rule() {
67+
Rule::doubleInner => res.push(Index::Field(unescape(r.as_str()))),
68+
Rule::singleInner => res.push(Index::Field(unescape_single(r.as_str()))),
69+
_ => panic!("invalid parse tree {:?}", r),
70+
}
71+
}
72+
res
73+
}
74+
75+
fn parse_union_array_index(matcher_rule: pest::iterators::Pair<Rule>) -> Index {
76+
let i = matcher_rule.as_str().parse().unwrap();
77+
Index::Number(i)
78+
}
79+
80+
fn unescape(contents: &str) -> String {
81+
let s = format!(r#""{}""#, contents);
82+
serde_json::from_str(&s).unwrap()
83+
}
84+
85+
fn unescape_single(contents: &str) -> String {
86+
let d = to_double_quoted(contents);
87+
unescape(&d)
88+
}
89+
90+
// converts a single quoted string body into a string that can be unescaped
91+
// by a function that knows how to unescape double quoted string,
92+
// It works by unescaping single quotes and escaping double quotes while leaving
93+
// everything else untouched.
94+
fn to_double_quoted(contents: &str) -> String {
95+
let mut output = String::new();
96+
let mut escaping = false;
97+
for ch in contents.chars() {
98+
if !escaping {
99+
if ch == '\\' {
100+
escaping = true;
101+
} else {
102+
if ch == '"' {
103+
output.push('\\');
104+
}
105+
output.push(ch);
106+
}
107+
} else {
108+
escaping = false;
109+
if ch != '\'' {
110+
output.push('\\');
111+
};
112+
output.push(ch);
113+
}
114+
}
115+
output
116+
}
117+
118+
#[cfg(test)]
119+
mod test {
120+
use super::*;
121+
122+
#[test]
123+
fn test_to_double() {
124+
assert_eq!(to_double_quoted(r#"ab"#), r#"ab"#);
125+
assert_eq!(to_double_quoted(r#"a"b"#), r#"a\"b"#);
126+
assert_eq!(to_double_quoted(r#"a\'b"#), r#"a'b"#);
127+
assert_eq!(to_double_quoted(r#"a\nb"#), r#"a\nb"#);
128+
assert_eq!(to_double_quoted(r#"a\bb"#), r#"a\bb"#);
129+
assert_eq!(to_double_quoted(r#"a\\b"#), r#"a\\b"#);
130+
}
131+
}

0 commit comments

Comments
 (0)