Skip to content
This repository was archived by the owner on Feb 22, 2024. It is now read-only.

Commit c64ec89

Browse files
author
Marko Mikulicic
authored
Merge pull request #18 from mkmik/surrogate
Accept surrogate pairs
2 parents 69d89cc + b0e9f7e commit c64ec89

File tree

2 files changed

+53
-26
lines changed

2 files changed

+53
-26
lines changed

src/parser.rs

Lines changed: 43 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ fn parse_union_child(matcher_rule: pest::iterators::Pair<Rule>) -> Vec<Box<dyn m
101101
}
102102

103103
Rule::singleInner => {
104-
ms.push(Box::new(matchers::Child::new(unescape(r.as_str()))));
104+
ms.push(Box::new(matchers::Child::new(unescape_single(r.as_str()))));
105105
}
106106

107107
_ => (),
@@ -119,38 +119,55 @@ fn parse_union_array_index(
119119
ms
120120
}
121121

122-
const ESCAPED: &str = "\"'\\/bfnrt";
123-
const UNESCAPED: &str = "\"'\\/\u{0008}\u{000C}\u{000A}\u{000D}\u{0009}";
124-
125122
fn unescape(contents: &str) -> String {
123+
let s = format!(r#""{}""#, contents);
124+
serde_json::from_str(&s).unwrap()
125+
}
126+
127+
fn unescape_single(contents: &str) -> String {
128+
let d = to_double_quoted(contents);
129+
unescape(&d)
130+
}
131+
132+
// converts a single quoted string body into a string that can be unescaped
133+
// by a function that knows how to unescape double quoted string,
134+
// It works by unescaping single quotes and escaping double quotes while leaving
135+
// everything else untouched.
136+
fn to_double_quoted(contents: &str) -> String {
126137
let mut output = String::new();
127-
let xs: Vec<char> = contents.chars().collect();
128-
let mut i = 0;
129-
while i < xs.len() {
130-
if xs[i] == '\\' {
131-
i += 1;
132-
if xs[i] == 'u' {
133-
i += 1;
134-
135-
// convert xs[i..i+4] to Unicode character and add it to the output
136-
let x = xs[i..i + 4].iter().collect::<String>();
137-
let n = u32::from_str_radix(&x, 16);
138-
let u = std::char::from_u32(n.unwrap());
139-
output.push(u.unwrap());
140-
141-
i += 4;
138+
let mut escaping = false;
139+
for ch in contents.chars() {
140+
if !escaping {
141+
if ch == '\\' {
142+
escaping = true;
142143
} else {
143-
for (j, c) in ESCAPED.chars().enumerate() {
144-
if xs[i] == c {
145-
output.push(UNESCAPED.chars().nth(j).unwrap())
146-
}
144+
if ch == '"' {
145+
output.push('\\');
147146
}
148-
i += 1;
147+
output.push(ch);
149148
}
150149
} else {
151-
output.push(xs[i]);
152-
i += 1;
150+
escaping = false;
151+
if ch != '\'' {
152+
output.push('\\');
153+
};
154+
output.push(ch);
153155
}
154156
}
155157
output
156158
}
159+
160+
#[cfg(test)]
161+
mod test {
162+
use super::*;
163+
164+
#[test]
165+
fn test_to_double() {
166+
assert_eq!(to_double_quoted(r#"ab"#), r#"ab"#);
167+
assert_eq!(to_double_quoted(r#"a"b"#), r#"a\"b"#);
168+
assert_eq!(to_double_quoted(r#"a\'b"#), r#"a'b"#);
169+
assert_eq!(to_double_quoted(r#"a\nb"#), r#"a\nb"#);
170+
assert_eq!(to_double_quoted(r#"a\bb"#), r#"a\bb"#);
171+
assert_eq!(to_double_quoted(r#"a\\b"#), r#"a\\b"#);
172+
}
173+
}

tests/cts.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,16 @@
236236
"selector": "$[\"\\u263A\"]",
237237
"document": {"☺" : "A"},
238238
"result": ["A"]
239+
}, {
240+
"name": "union child, double quotes, surrogate pair 𝄞",
241+
"selector": "$[\"\\uD834\\uDD1E\"]",
242+
"document": {"𝄞" : "A"},
243+
"result": ["A"]
244+
}, {
245+
"name": "union child, double quotes, surrogate pair 😀",
246+
"selector": "$[\"\\uD83D\\uDE00\"]",
247+
"document": {"😀" : "A"},
248+
"result": ["A"]
239249
}, {
240250
"name": "union child, double quotes, invalid escaped ☺",
241251
"selector": "$[\"\\u263a\"]",

0 commit comments

Comments
 (0)