diff --git a/src/grammar.pest b/src/grammar.pest index b656896..30b93a3 100644 --- a/src/grammar.pest +++ b/src/grammar.pest @@ -3,14 +3,35 @@ selector = _{ SOI ~ jsonPath ~ EOI } jsonPath = ${ rootSelector ~ matcher* } rootSelector = { "$" } -matcher = { dotChild } +matcher = { dotChild | union } dotChild = _{ wildcardedDotChild | namedDotChild } wildcardedDotChild = { ".*" } namedDotChild = ${ "." ~ childName } childName = @{ char+ } char = { - !("\"" | "'" | "\\") ~ ANY + !("\"" | "'" | "\\") ~ ANY // char is still TBD in the draft +} + +union = { "[" ~ unionElement ~ ("," ~ unionElement)* ~ "]" } +unionElement = _{ unionChild } +unionChild = { doubleQuotedString | singleQuotedString } + +doubleQuotedString = _{ "\"" ~ doubleInner ~ "\"" } +doubleInner = @{ doubleChar* } +doubleChar = { + !("\"" | "\\" | '\u{00}'..'\u{1F}') ~ ANY + | "\\" ~ ("\"" | "\\" | "/" | "b" | "f" | "n" | "r" | "t") + | "\\" ~ ("u" ~ upperHexDigit{4}) +} +upperHexDigit = _{ ASCII_DIGIT | "A" | "B" | "C" | "D" | "E" | "F" } + +singleQuotedString = _{ "'" ~ singleInner ~ "'" } +singleInner = @{ singleChar* } +singleChar = { + !("'" | "\\" | '\u{00}'..'\u{1F}') ~ ANY + | "\\" ~ ("'" | "\\" | "/" | "b" | "f" | "n" | "r" | "t") + | "\\" ~ ("u" ~ upperHexDigit{4}) } WHITESPACE = _{ " " } \ No newline at end of file diff --git a/src/matchers.rs b/src/matchers.rs index c8de462..7b60c90 100644 --- a/src/matchers.rs +++ b/src/matchers.rs @@ -33,15 +33,15 @@ impl Matcher for WildcardedChild { } } -pub struct DotChild { +pub struct Child { name: String, } -pub fn new_dot_child_matcher(name: String) -> DotChild { - DotChild { name } +pub fn new_child_matcher(name: String) -> Child { + Child { name } } -impl Matcher for DotChild { +impl Matcher for Child { fn select<'a>(&self, node: &'a Value) -> Box + 'a> { if node.is_object() { let mapping = node.as_object().unwrap(); @@ -55,3 +55,25 @@ impl Matcher for DotChild { } } } + +pub struct Union { + elements: Vec>, +} + +pub fn new_union(elements: Vec>) -> Union { + Union { elements } +} + +impl Matcher for Union { + fn select<'a, 'b>(&'a self, node: &'b Value) -> Box + 'b> { + // union of matches of the matchers in the union + let mut u = vec![]; + for m in &self.elements { + let m_selection = m.select(node); + for s in m_selection { + u.push(s); + } + } + Box::new(u.into_iter()) + } +} diff --git a/src/parser.rs b/src/parser.rs index 66294b7..aef543c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -22,11 +22,13 @@ pub fn parse<'a>(selector: &'a str) -> Result, String> for r in selector_rule.into_inner() { match r.as_rule() { Rule::rootSelector => ms.push(Box::new(matchers::RootSelector {})), + Rule::matcher => { for m in parse_matcher(r) { ms.push(m) } } + _ => println!("r={:?}", r), } } @@ -39,11 +41,19 @@ fn parse_matcher(matcher_rule: pest::iterators::Pair) -> Vec ms.push(Box::new(matchers::WildcardedChild {})), + Rule::namedDotChild => { for m in parse_dot_child_matcher(r) { ms.push(m) } } + + Rule::union => { + for m in parse_union(r) { + ms.push(m) + } + } + _ => (), } } @@ -56,10 +66,74 @@ fn parse_dot_child_matcher( let mut ms: Vec> = Vec::new(); for r in matcher_rule.into_inner() { if let Rule::childName = r.as_rule() { - ms.push(Box::new(matchers::new_dot_child_matcher( - r.as_str().to_owned(), - ))); + ms.push(Box::new(matchers::new_child_matcher(r.as_str().to_owned()))); } } ms } + +fn parse_union(matcher_rule: pest::iterators::Pair) -> Vec> { + let mut ms: Vec> = Vec::new(); + for r in matcher_rule.into_inner() { + if let Rule::unionChild = r.as_rule() { + for m in parse_union_child(r) { + ms.push(m) + } + } + } + vec![Box::new(matchers::new_union(ms))] +} + +fn parse_union_child(matcher_rule: pest::iterators::Pair) -> Vec> { + let mut ms: Vec> = Vec::new(); + for r in matcher_rule.into_inner() { + match r.as_rule() { + Rule::doubleInner => { + ms.push(Box::new(matchers::new_child_matcher(unescape(r.as_str())))); + } + + Rule::singleInner => { + ms.push(Box::new(matchers::new_child_matcher(unescape(r.as_str())))); + } + + _ => (), + } + } + ms +} + +const ESCAPED: &str = "\"'\\/bfnrt"; +const UNESCAPED: &str = "\"'\\/\u{0008}\u{000C}\u{000A}\u{000D}\u{0009}"; + +fn unescape(contents: &str) -> String { + let mut output = String::new(); + let xs: Vec = contents.chars().collect(); + let mut i = 0; + while i < xs.len() { + if xs[i] == '\\' { + i += 1; + if xs[i] == 'u' { + i += 1; + + // convert xs[i..i+4] to Unicode character and add it to the output + let x = xs[i..i + 4].iter().collect::(); + let n = u32::from_str_radix(&x, 16); + let u = std::char::from_u32(n.unwrap()); + output.push(u.unwrap()); + + i += 4; + } else { + for (j, c) in ESCAPED.chars().enumerate() { + if xs[i] == c { + output.push(UNESCAPED.chars().nth(j).unwrap()) + } + } + i += 1; + } + } else { + output.push(xs[i]); + i += 1; + } + } + output +} diff --git a/tests/cts.json b/tests/cts.json index dcfc217..7254d26 100644 --- a/tests/cts.json +++ b/tests/cts.json @@ -34,5 +34,423 @@ "selector": "$.*.a", "document": {"x": {"a" : "Ax", "b" : "Bx"}, "y": {"a" : "Ay", "b" : "By"}}, "result": ["Ax", "Ay"] - } + }, { + "name": "union child, double quotes", + "selector": "$[\"a\"]", + "document": {"a" : "A", "b" : "B"}, + "result": ["A"] + }, { + "name": "union child absent, double quotes", + "selector": "$[\"c\"]", + "document": {"a" : "A", "b" : "B"}, + "result": [] + }, { + "name": "union child of array, double quotes", + "selector": "$[\"a\"]", + "document": ["first", "second"], + "result": [] + }, { + "name": "union child, double quotes, embedded U+0000", + "selector": "$[\"\u0000\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0001", + "selector": "$[\"\u0001\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0002", + "selector": "$[\"\u0002\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0003", + "selector": "$[\"\u0003\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0004", + "selector": "$[\"\u0004\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0005", + "selector": "$[\"\u0005\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0006", + "selector": "$[\"\u0006\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0007", + "selector": "$[\"\u0007\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0008", + "selector": "$[\"\u0008\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0009", + "selector": "$[\"\u0009\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+000A", + "selector": "$[\"\u000A\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+000B", + "selector": "$[\"\u000B\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+000C", + "selector": "$[\"\u000C\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+000D", + "selector": "$[\"\u000D\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+000E", + "selector": "$[\"\u000E\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+000F", + "selector": "$[\"\u000F\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0010", + "selector": "$[\"\u0010\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0011", + "selector": "$[\"\u0011\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0012", + "selector": "$[\"\u0012\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0013", + "selector": "$[\"\u0013\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0014", + "selector": "$[\"\u0014\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0015", + "selector": "$[\"\u0015\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0016", + "selector": "$[\"\u0016\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0017", + "selector": "$[\"\u0017\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0018", + "selector": "$[\"\u0018\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0019", + "selector": "$[\"\u0019\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+001A", + "selector": "$[\"\u001A\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+001B", + "selector": "$[\"\u001B\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+001C", + "selector": "$[\"\u001C\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+001D", + "selector": "$[\"\u001D\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+001E", + "selector": "$[\"\u001E\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+001F", + "selector": "$[\"\u001F\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded U+0020", + "selector": "$[\"\u0020\"]", + "document": {"\u0020" : "A"}, + "result": ["A"] + }, { + "name": "union child, double quotes, escaped double quote", + "selector": "$[\"\\\"\"]", + "document": {"\"" : "A"}, + "result": ["A"] + }, { + "name": "union child, double quotes, escaped reverse solidus", + "selector": "$[\"\\\\\"]", + "document": {"\\" : "A"}, + "result": ["A"] + }, { + "name": "union child, double quotes, escaped solidus", + "selector": "$[\"\\/\"]", + "document": {"/" : "A"}, + "result": ["A"] + }, { + "name": "union child, double quotes, escaped backspace", + "selector": "$[\"\\b\"]", + "document": {"\u0008" : "A"}, + "result": ["A"] + }, { + "name": "union child, double quotes, escaped form feed", + "selector": "$[\"\\f\"]", + "document": {"\u000C" : "A"}, + "result": ["A"] + }, { + "name": "union child, double quotes, escaped line feed", + "selector": "$[\"\\n\"]", + "document": {"\u000A" : "A"}, + "result": ["A"] + }, { + "name": "union child, double quotes, escaped carriage return", + "selector": "$[\"\\r\"]", + "document": {"\u000D" : "A"}, + "result": ["A"] + }, { + "name": "union child, double quotes, escaped tab", + "selector": "$[\"\\t\"]", + "document": {"\u0009" : "A"}, + "result": ["A"] + }, { + "name": "union child, double quotes, escaped ☺", + "selector": "$[\"\\u263A\"]", + "document": {"☺" : "A"}, + "result": ["A"] + }, { + "name": "union child, double quotes, invalid escaped ☺", + "selector": "$[\"\\u263a\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, invalid escaped single quote", + "selector": "$[\"\\'\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, embedded double quote", + "selector": "$[\"\"\"]", + "invalid_selector": true + }, { + "name": "union child, double quotes, incomplete escape", + "selector": "$[\"\\\"]", + "invalid_selector": true + }, { + "name": "union child, single quotes", + "selector": "$['a']", + "document": {"a" : "A", "b" : "B"}, + "result": ["A"] + }, { + "name": "union child absent, single quotes", + "selector": "$['c']", + "document": {"a" : "A", "b" : "B"}, + "result": [] + }, { + "name": "union child of array, single quotes", + "selector": "$['a']", + "document": ["first", "second"], + "result": [] + }, { + "name": "union child, single quotes, embedded U+0000", + "selector": "$['\u0000']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0001", + "selector": "$['\u0001']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0002", + "selector": "$['\u0002']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0003", + "selector": "$['\u0003']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0004", + "selector": "$['\u0004']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0005", + "selector": "$['\u0005']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0006", + "selector": "$['\u0006']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0007", + "selector": "$['\u0007']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0008", + "selector": "$['\u0008']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0009", + "selector": "$['\u0009']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+000A", + "selector": "$['\u000A']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+000B", + "selector": "$['\u000B']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+000C", + "selector": "$['\u000C']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+000D", + "selector": "$['\u000D']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+000E", + "selector": "$['\u000E']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+000F", + "selector": "$['\u000F']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0010", + "selector": "$['\u0010']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0011", + "selector": "$['\u0011']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0012", + "selector": "$['\u0012']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0013", + "selector": "$['\u0013']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0014", + "selector": "$['\u0014']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0015", + "selector": "$['\u0015']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0016", + "selector": "$['\u0016']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0017", + "selector": "$['\u0017']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0018", + "selector": "$['\u0018']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0019", + "selector": "$['\u0019']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+001A", + "selector": "$['\u001A']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+001B", + "selector": "$['\u001B']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+001C", + "selector": "$['\u001C']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+001D", + "selector": "$['\u001D']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+001E", + "selector": "$['\u001E']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+001F", + "selector": "$['\u001F']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded U+0020", + "selector": "$['\u0020']", + "document": {"\u0020" : "A"}, + "result": ["A"] + }, { + "name": "union child, single quotes, escaped single quote", + "selector": "$['\\'']", + "document": {"'" : "A"}, + "result": ["A"] + }, { + "name": "union child, single quotes, escaped reverse solidus", + "selector": "$['\\\\']", + "document": {"\\" : "A"}, + "result": ["A"] + }, { + "name": "union child, single quotes, escaped solidus", + "selector": "$['\\/']", + "document": {"/" : "A"}, + "result": ["A"] + }, { + "name": "union child, single quotes, escaped backspace", + "selector": "$['\\b']", + "document": {"\u0008" : "A"}, + "result": ["A"] + }, { + "name": "union child, single quotes, escaped form feed", + "selector": "$['\\f']", + "document": {"\u000C" : "A"}, + "result": ["A"] + }, { + "name": "union child, single quotes, escaped line feed", + "selector": "$['\\n']", + "document": {"\u000A" : "A"}, + "result": ["A"] + }, { + "name": "union child, single quotes, escaped carriage return", + "selector": "$['\\r']", + "document": {"\u000D" : "A"}, + "result": ["A"] + }, { + "name": "union child, single quotes, escaped tab", + "selector": "$['\\t']", + "document": {"\u0009" : "A"}, + "result": ["A"] + }, { + "name": "union child, single quotes, escaped ☺", + "selector": "$['\\u263A']", + "document": {"☺" : "A"}, + "result": ["A"] + }, { + "name": "union child, single quotes, invalid escaped ☺", + "selector": "$['\\u263a']", + "invalid_selector": true + }, { + "name": "union child, single quotes, invalid escaped double quote", + "selector": "$['\\\"']", + "invalid_selector": true + }, { + "name": "union child, single quotes, embedded single quote", + "selector": "$[''']", + "invalid_selector": true + }, { + "name": "union child, single quotes, incomplete escape", + "selector": "$['\\']", + "invalid_selector": true + } ]} \ No newline at end of file diff --git a/tests/cts.rs b/tests/cts.rs index 9eee8da..65b09af 100644 --- a/tests/cts.rs +++ b/tests/cts.rs @@ -22,8 +22,16 @@ mod tests { struct Testcase { name: String, selector: String, - document: serde_json::Value, - result: serde_json::Value, + + #[serde(default)] + invalid_selector: bool, + + #[serde(default)] + document: serde_json::Value, // omitted if invalid_selector = true + + #[serde(default)] + result: serde_json::Value, // omitted if invalid_selector = true + #[serde(default)] focus: bool, // if true, run only tests with focus set to true } @@ -50,14 +58,16 @@ mod tests { ); } let path = jsonpath::parse(&t.selector); - assert!( - path.is_ok(), - "parsing {} failed: {}", - t.selector, - path.err().expect("should be an error") - ); - if let Ok(p) = path { + if let Ok(ref p) = path { + if t.invalid_selector { + assert!( + path.is_err(), + "{}: parsing {} should have failed", + t.name, + t.selector + ); + } if let Ok(result) = p.find(&t.document) { if !equal(&result, as_array(&t.result).expect("invalid result")) { assert!( @@ -71,6 +81,16 @@ mod tests { } else { assert!(false, "find failed") // should not happen } + } else { + if !t.invalid_selector { + assert!( + path.is_ok(), + "{}: parsing {} should have succeeded but failed: {}", + t.name, + t.selector, + path.err().expect("should be an error") + ); + } } }); if let Err(err) = result {