Skip to content
This repository was archived by the owner on Feb 22, 2024. It is now read-only.

Commit f5514c9

Browse files
author
Marko Mikulicic
committed
Play with explicit AST
1 parent 4ad5133 commit f5514c9

File tree

3 files changed

+277
-0
lines changed

3 files changed

+277
-0
lines changed

src/ast.rs

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
/*
2+
* Copyright 2020 VMware, Inc.
3+
*
4+
* SPDX-License-Identifier: BSD-2-Clause
5+
*/
6+
7+
use serde_json::Value;
8+
9+
/// A path is a tree of selector nodes.
10+
///
11+
/// For example, the JSONPath `$.foo.bar` yields this AST:
12+
///
13+
/// ```text
14+
/// *
15+
/// / \
16+
/// * \___ DotName("bar")
17+
/// / \
18+
/// * \___ DotName("foo")
19+
/// /
20+
/// Root ___/
21+
/// ```
22+
///
23+
/// A more complicated example: `$.foo[1,2]["bar"]`:
24+
///
25+
/// ```text
26+
/// *
27+
/// / \
28+
/// * \___ Union
29+
/// / \ \
30+
/// * \___ Union \
31+
/// / \ [Field("bar")]
32+
/// * \
33+
/// / \ [Number(1), Number(2)]
34+
/// / \
35+
/// Root ___/ \___ DotName("foo")
36+
/// ```
37+
///
38+
#[derive(Debug)]
39+
pub enum Path {
40+
Root,
41+
Sel(Box<Path>, Selector),
42+
}
43+
44+
#[derive(Debug)]
45+
pub enum Selector {
46+
Union(Vec<Index>),
47+
DotName(String),
48+
DotWildcard,
49+
}
50+
51+
#[derive(Debug)]
52+
pub enum Index {
53+
Field(String),
54+
Number(i64),
55+
}
56+
57+
type Iter<'a> = Box<dyn Iterator<Item = &'a Value> + 'a>;
58+
59+
impl Path {
60+
pub fn find<'a>(&'a self, input: &'a Value) -> Iter<'a> {
61+
match self {
62+
Path::Root => Box::new(std::iter::once(input)),
63+
Path::Sel(left, sel) => Box::new(left.find(input).flat_map(move |v| sel.find(v))),
64+
}
65+
}
66+
}
67+
68+
impl Selector {
69+
pub fn find<'a>(&'a self, input: &'a Value) -> Iter<'a> {
70+
match self {
71+
Selector::Union(indices) => Box::new(indices.iter().flat_map(move |i| i.get(input))),
72+
Selector::DotName(name) => Box::new(input.get(name).into_iter()),
73+
Selector::DotWildcard => match input {
74+
Value::Object(m) => Box::new(m.values()),
75+
Value::Array(a) => Box::new(a.iter()),
76+
_ => Box::new(std::iter::empty()),
77+
},
78+
}
79+
}
80+
}
81+
82+
impl Index {
83+
pub fn get<'a>(&self, v: &'a Value) -> Iter<'a> {
84+
match self {
85+
Index::Field(name) => Box::new(v.get(name).into_iter()),
86+
Index::Number(num) => Box::new(v.get(abs_index(*num, v)).into_iter()),
87+
}
88+
}
89+
}
90+
91+
fn abs_index(index: i64, node: &Value) -> usize {
92+
if index >= 0 {
93+
index as usize
94+
} else {
95+
let len = if let Value::Array(a) = node {
96+
a.len() as i64
97+
} else {
98+
0
99+
};
100+
(len + index) as usize
101+
}
102+
}
103+
104+
#[cfg(test)]
105+
mod test {
106+
use super::*;
107+
use crate::parser_ast::parse;
108+
use serde_json::json;
109+
110+
#[test]
111+
fn demo() {
112+
let a1 = Path::Sel(
113+
Box::new(Path::Root),
114+
Selector::DotName("foo".to_owned()),
115+
);
116+
let a2 = Path::Sel(Box::new(a1), Selector::DotName("bar".to_owned()));
117+
let a3 = Path::Sel(
118+
Box::new(a2),
119+
Selector::Union(vec![Index::Field("baz".to_owned())]),
120+
);
121+
let a4 = Path::Sel(Box::new(a3), Selector::Union(vec![Index::Number(4)]));
122+
123+
let j = json!({"foo":{"bar":{"baz":[10,20,30,40,50,60]}}});
124+
println!("j: {}", j);
125+
126+
let v = a4.find(&j).collect::<Vec<_>>();
127+
assert_eq!(v[0], 50);
128+
}
129+
130+
#[test]
131+
fn parse_demo() -> Result<(), String> {
132+
let p = parse("$.foo['bar'].*[4,-1]")?;
133+
println!("AST: {:?}", &p);
134+
let j = json!({"foo":{"bar":{"baz":[10,20,30,40,50,60]}}});
135+
136+
let v = p.find(&j).collect::<Vec<_>>();
137+
println!("RES: {:?}", v);
138+
139+
assert_eq!(v[0], 50);
140+
assert_eq!(v[1], 60);
141+
Ok(())
142+
}
143+
}

src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,6 @@ pub mod jsonpath;
1212
mod matchers;
1313
mod parser;
1414
pub mod path;
15+
16+
pub mod ast;
17+
pub mod parser_ast;

src/parser_ast.rs

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
/*
2+
* Copyright 2020 VMware, Inc.
3+
*
4+
* SPDX-License-Identifier: BSD-2-Clause
5+
*/
6+
7+
use crate::ast::*;
8+
use crate::pest::Parser;
9+
10+
#[derive(Parser)]
11+
#[grammar = "grammar.pest"]
12+
struct PathParser;
13+
14+
pub fn parse(selector: &str) -> Result<Path, String> {
15+
let selector_rule = PathParser::parse(Rule::selector, selector)
16+
.map_err(|e| format!("{}", e))?
17+
.next()
18+
.unwrap();
19+
20+
let mut res = Path::Root;
21+
for r in selector_rule.into_inner() {
22+
res = match r.as_rule() {
23+
Rule::rootSelector => res, // TODO: fix grammar so that this is a silent rule since we don't need it
24+
Rule::matcher => Path::Sel(Box::new(res), parse_selector(r)),
25+
_ => panic!("invalid parse tree {:?}", r),
26+
}
27+
}
28+
Ok(res)
29+
}
30+
31+
fn parse_selector(matcher_rule: pest::iterators::Pair<Rule>) -> Selector {
32+
let r = matcher_rule.into_inner().next().unwrap();
33+
34+
match r.as_rule() {
35+
Rule::wildcardedDotChild => Selector::DotWildcard,
36+
Rule::namedDotChild => Selector::DotName(parse_child_name(r)),
37+
Rule::union => Selector::Union(parse_union_indices(r)),
38+
_ => panic!("invalid parse tree {:?}", r),
39+
}
40+
}
41+
42+
fn parse_child_name(matcher_rule: pest::iterators::Pair<Rule>) -> String {
43+
let r = matcher_rule.into_inner().next().unwrap();
44+
match r.as_rule() {
45+
Rule::childName => r.as_str().to_owned(),
46+
_ => panic!("invalid parse tree {:?}", r),
47+
}
48+
}
49+
50+
fn parse_union_indices(matcher_rule: pest::iterators::Pair<Rule>) -> Vec<Index> {
51+
let mut res = Vec::new();
52+
53+
for r in matcher_rule.into_inner() {
54+
match r.as_rule() {
55+
Rule::unionChild => res.append(&mut parse_union_child(r)),
56+
Rule::unionArrayIndex => res.push(parse_union_array_index(r)),
57+
_ => panic!("invalid parse tree {:?}", r),
58+
}
59+
}
60+
res
61+
}
62+
63+
fn parse_union_child(matcher_rule: pest::iterators::Pair<Rule>) -> Vec<Index> {
64+
let mut res = Vec::new();
65+
for r in matcher_rule.into_inner() {
66+
match r.as_rule() {
67+
Rule::doubleInner => res.push(Index::Field(unescape(r.as_str()))),
68+
Rule::singleInner => res.push(Index::Field(unescape_single(r.as_str()))),
69+
_ => panic!("invalid parse tree {:?}", r),
70+
}
71+
}
72+
res
73+
}
74+
75+
fn parse_union_array_index(matcher_rule: pest::iterators::Pair<Rule>) -> Index {
76+
let i = matcher_rule.as_str().parse().unwrap();
77+
Index::Number(i)
78+
}
79+
80+
fn unescape(contents: &str) -> String {
81+
let s = format!(r#""{}""#, contents);
82+
serde_json::from_str(&s).unwrap()
83+
}
84+
85+
fn unescape_single(contents: &str) -> String {
86+
let d = to_double_quoted(contents);
87+
unescape(&d)
88+
}
89+
90+
// converts a single quoted string body into a string that can be unescaped
91+
// by a function that knows how to unescape double quoted string,
92+
// It works by unescaping single quotes and escaping double quotes while leaving
93+
// everything else untouched.
94+
fn to_double_quoted(contents: &str) -> String {
95+
let mut output = String::new();
96+
let mut escaping = false;
97+
for ch in contents.chars() {
98+
if !escaping {
99+
if ch == '\\' {
100+
escaping = true;
101+
} else {
102+
if ch == '"' {
103+
output.push('\\');
104+
}
105+
output.push(ch);
106+
}
107+
} else {
108+
escaping = false;
109+
if ch != '\'' {
110+
output.push('\\');
111+
};
112+
output.push(ch);
113+
}
114+
}
115+
output
116+
}
117+
118+
#[cfg(test)]
119+
mod test {
120+
use super::*;
121+
122+
#[test]
123+
fn test_to_double() {
124+
assert_eq!(to_double_quoted(r#"ab"#), r#"ab"#);
125+
assert_eq!(to_double_quoted(r#"a"b"#), r#"a\"b"#);
126+
assert_eq!(to_double_quoted(r#"a\'b"#), r#"a'b"#);
127+
assert_eq!(to_double_quoted(r#"a\nb"#), r#"a\nb"#);
128+
assert_eq!(to_double_quoted(r#"a\bb"#), r#"a\bb"#);
129+
assert_eq!(to_double_quoted(r#"a\\b"#), r#"a\\b"#);
130+
}
131+
}

0 commit comments

Comments
 (0)