Skip to content
This repository was archived by the owner on Feb 22, 2024. It is now read-only.

Commit 404c8a7

Browse files
author
Marko Mikulicic
committed
Accept surrogate pairs
1 parent eafdf74 commit 404c8a7

File tree

2 files changed

+53
-26
lines changed

2 files changed

+53
-26
lines changed

src/parser.rs

Lines changed: 43 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ fn parse_union_child(matcher_rule: pest::iterators::Pair<Rule>) -> Vec<Box<dyn m
9393
}
9494

9595
Rule::singleInner => {
96-
ms.push(Box::new(matchers::Child::new(unescape(r.as_str()))));
96+
ms.push(Box::new(matchers::Child::new(unescape_single(r.as_str()))));
9797
}
9898

9999
_ => (),
@@ -102,38 +102,55 @@ fn parse_union_child(matcher_rule: pest::iterators::Pair<Rule>) -> Vec<Box<dyn m
102102
ms
103103
}
104104

105-
const ESCAPED: &str = "\"'\\/bfnrt";
106-
const UNESCAPED: &str = "\"'\\/\u{0008}\u{000C}\u{000A}\u{000D}\u{0009}";
107-
108105
fn unescape(contents: &str) -> String {
106+
let s = format!(r#""{}""#, contents);
107+
serde_json::from_str(&s).unwrap()
108+
}
109+
110+
fn unescape_single(contents: &str) -> String {
111+
let d = to_double_quoted(contents);
112+
unescape(&d)
113+
}
114+
115+
// converts a single quoted string body into a string that can be unescaped
116+
// by a function that knows how to unescape double quoted string,
117+
// It works by unescaping single quotes and escaping double quotes while leaving
118+
// everything else untouched.
119+
fn to_double_quoted(contents: &str) -> String {
109120
let mut output = String::new();
110-
let xs: Vec<char> = contents.chars().collect();
111-
let mut i = 0;
112-
while i < xs.len() {
113-
if xs[i] == '\\' {
114-
i += 1;
115-
if xs[i] == 'u' {
116-
i += 1;
117-
118-
// convert xs[i..i+4] to Unicode character and add it to the output
119-
let x = xs[i..i + 4].iter().collect::<String>();
120-
let n = u32::from_str_radix(&x, 16);
121-
let u = std::char::from_u32(n.unwrap());
122-
output.push(u.unwrap());
123-
124-
i += 4;
121+
let mut escaping = false;
122+
for ch in contents.chars() {
123+
if !escaping {
124+
if ch == '\\' {
125+
escaping = true;
125126
} else {
126-
for (j, c) in ESCAPED.chars().enumerate() {
127-
if xs[i] == c {
128-
output.push(UNESCAPED.chars().nth(j).unwrap())
129-
}
127+
if ch == '"' {
128+
output.push('\\');
130129
}
131-
i += 1;
130+
output.push(ch);
132131
}
133132
} else {
134-
output.push(xs[i]);
135-
i += 1;
133+
escaping = false;
134+
if ch != '\'' {
135+
output.push('\\');
136+
};
137+
output.push(ch);
136138
}
137139
}
138140
output
139141
}
142+
143+
#[cfg(test)]
144+
mod test {
145+
use super::*;
146+
147+
#[test]
148+
fn test_to_double() {
149+
assert_eq!(to_double_quoted(r#"ab"#), r#"ab"#);
150+
assert_eq!(to_double_quoted(r#"a"b"#), r#"a\"b"#);
151+
assert_eq!(to_double_quoted(r#"a\'b"#), r#"a'b"#);
152+
assert_eq!(to_double_quoted(r#"a\nb"#), r#"a\nb"#);
153+
assert_eq!(to_double_quoted(r#"a\bb"#), r#"a\bb"#);
154+
assert_eq!(to_double_quoted(r#"a\\b"#), r#"a\\b"#);
155+
}
156+
}

tests/cts.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,16 @@
236236
"selector": "$[\"\\u263A\"]",
237237
"document": {"☺" : "A"},
238238
"result": ["A"]
239+
}, {
240+
"name": "union child, double quotes, surrogate pair 𝄞",
241+
"selector": "$[\"\\uD834\\uDD1E\"]",
242+
"document": {"𝄞" : "A"},
243+
"result": ["A"]
244+
}, {
245+
"name": "union child, double quotes, surrogate pair 😀",
246+
"selector": "$[\"\\uD83D\\uDE00\"]",
247+
"document": {"😀" : "A"},
248+
"result": ["A"]
239249
}, {
240250
"name": "union child, double quotes, invalid escaped ☺",
241251
"selector": "$[\"\\u263a\"]",

0 commit comments

Comments
 (0)