Skip to content

Commit 09cfb24

Browse files
authored
Merge pull request github#4648 from erik-krogh/regexpParse
Approved by asgerf
2 parents 13edc37 + 7f68b07 commit 09cfb24

File tree

6 files changed

+41
-2
lines changed

6 files changed

+41
-2
lines changed

javascript/extractor/src/com/semmle/js/extractor/Main.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ public class Main {
4343
* A version identifier that should be updated every time the extractor changes in such a way that
4444
* it may produce different tuples for the same file under the same {@link ExtractorConfig}.
4545
*/
46-
public static final String EXTRACTOR_VERSION = "2020-09-17";
46+
public static final String EXTRACTOR_VERSION = "2020-11-11";
4747

4848
public static final Pattern NEWLINE = Pattern.compile("\n");
4949

javascript/extractor/src/com/semmle/js/parser/RegExpParser.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import com.semmle.js.ast.regexp.ZeroWidthPositiveLookahead;
3636
import com.semmle.js.ast.regexp.ZeroWidthPositiveLookbehind;
3737
import java.util.ArrayList;
38+
import java.util.Arrays;
3839
import java.util.List;
3940

4041
/** A parser for ECMAScript 2018 regular expressions. */
@@ -496,10 +497,18 @@ private RegExpTerm parseCharacterClass() {
496497
return this.finishTerm(new CharacterClass(loc, elements, inverted));
497498
}
498499

500+
private static final List<String> escapeClasses = Arrays.asList("d", "D", "s", "S", "w", "W");
501+
499502
private RegExpTerm parseCharacterClassElement() {
500503
SourceLocation loc = new SourceLocation(pos());
501504
RegExpTerm atom = this.parseCharacterClassAtom();
502-
if (!this.lookahead("-]") && this.match("-"))
505+
if (this.lookahead("-\\")) {
506+
for (String c : escapeClasses) {
507+
if (this.lookahead("-\\" + c))
508+
return atom;
509+
}
510+
}
511+
if (!this.lookahead("-]") && this.match("-") && !(atom instanceof CharacterClassEscape))
503512
return this.finishTerm(new CharacterClassRange(loc, atom, this.parseCharacterClassAtom()));
504513
return atom;
505514
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
range
2+
| tst.js:1:13:1:17 | [w-z] | tst.js:1:14:1:16 | w-z |
3+
| tst.js:6:13:6:19 | [\\n-\\r] | tst.js:6:14:6:18 | \\n-\\r |
4+
| tst.js:7:13:7:18 | [\\n-z] | tst.js:7:14:7:17 | \\n-z |
5+
escapeClass
6+
| tst.js:2:13:2:16 | [\\w] | tst.js:2:14:2:15 | \\w |
7+
| tst.js:3:13:3:18 | [\\w-z] | tst.js:3:14:3:15 | \\w |
8+
| tst.js:4:13:4:19 | [\\w-\\w] | tst.js:4:14:4:15 | \\w |
9+
| tst.js:4:13:4:19 | [\\w-\\w] | tst.js:4:17:4:18 | \\w |
10+
| tst.js:5:13:5:18 | [z-\\w] | tst.js:5:16:5:17 | \\w |
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import javascript
2+
3+
query predicate range(RegExpCharacterClass cla, RegExpCharacterRange range) {
4+
cla.getAChild() = range
5+
}
6+
7+
query predicate escapeClass(RegExpCharacterClass cla, RegExpCharacterClassEscape escape) {
8+
cla.getAChild() = escape
9+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
var reg1 = /[w-z]/; // normal range w-z, matches: wxyz
2+
var reg2 = /[\w]/; // escape class, same as \w.
3+
var reg3 = /[\w-z]/; // escape class \w and "-" and "z", same as [a-zA-Z0-9\-z]
4+
var reg4 = /[\w-\w]/; // escape class \w (twice) and the char "-".
5+
var reg5 = /[z-\w]/; // same as reg3
6+
var reg6 = /[\n-\r]/; // from \n (code 10) to \r (code 13).
7+
var reg7 = /[\n-z]/; // from \n (code 10) to z (code 122).

javascript/ql/test/query-tests/Performance/ReDoS/PolynomialBackTracking.expected

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,11 @@
8181
| regexplib/address.js:95:379:95:755 | [a-zA-Z0-9&#192;&#193;&#194;&#195;&#196;&#197;&#198;&#199;&#200;&#201;&#202;&#203;&#204;&#205;&#206;&#207;&#208;&#209;&#210;&#211;&#212;&#213;&#214;&#216;&#217;&#218;&#219;&#220;&#221;&#223;&#224;&#225;&#226;&#227;&#228;&#229;&#230;&#231;&#232;&#233;&#234;&#235;&#236;&#237;&#238;&#239;&#241;&#242;&#243;&#244;&#245;&#246;&#248;&#249;&#250;&#251;&#252;&#253;&#255;\\.\\,\\-\\/\\' ]+ | it can start matching anywhere after the start of the preceeding '[a-zA-Z0-9&#192;&#193;&#194;&#195;&#196;&#197;&#198;&#199;&#200;&#201;&#202;&#203;&#204;&#205;&#206;&#207;&#208;&#209;&#210;&#211;&#212;&#213;&#214;&#216;&#217;&#218;&#219;&#220;&#221;&#223;&#224;&#225;&#226;&#227;&#228;&#229;&#230;&#231;&#232;&#233;&#234;&#235;&#236;&#237;&#238;&#239;&#241;&#242;&#243;&#244;&#245;&#246;&#248;&#249;&#250;&#251;&#252;&#253;&#255;\\.\\,\\-\\/\\']+' |
8282
| regexplib/email.js:8:16:8:49 | [^ \\t\\(\\)\\<\\>@,;\\:\\\\\\"\\.\\[\\]\\r\\n]+ | it can start matching anywhere |
8383
| regexplib/email.js:12:2:12:4 | \\w+ | it can start matching anywhere |
84+
| regexplib/email.js:15:6:15:13 | [\\w-\\.]* | it can start matching anywhere after the start of the preceeding '\\w+' |
8485
| regexplib/email.js:15:28:15:30 | \\w* | it can start matching anywhere after the start of the preceeding '\\w+' |
8586
| regexplib/email.js:20:3:20:6 | \\w+? | it can start matching anywhere |
8687
| regexplib/email.js:28:2:28:4 | \\w+ | it can start matching anywhere |
88+
| regexplib/email.js:28:5:28:12 | [\\w-\\.]* | it can start matching anywhere after the start of the preceeding '\\w+' |
8789
| regexplib/email.js:28:27:28:29 | \\w* | it can start matching anywhere after the start of the preceeding '\\w+' |
8890
| regexplib/email.js:28:73:28:87 | [0-9a-zA-Z'\\.]+ | it can start matching anywhere |
8991
| regexplib/email.js:28:125:28:139 | [0-9a-zA-Z'\\.]+ | it can start matching anywhere |
@@ -173,9 +175,11 @@
173175
| regexplib/uri.js:34:3:34:9 | [^\\=&]+ | it can start matching anywhere |
174176
| regexplib/uri.js:39:7:39:9 | .*? | it can start matching anywhere after the start of the preceeding '<a' |
175177
| regexplib/uri.js:44:2:44:4 | .*? | it can start matching anywhere |
178+
| regexplib/uri.js:47:31:47:36 | [\\w-]+ | it can start matching anywhere after the start of the preceeding '[\\w-\\s]*' |
176179
| regexplib/uri.js:53:3:53:9 | [^\\=&]+ | it can start matching anywhere |
177180
| regexplib/uri.js:58:2:58:45 | ((http\\:\\/\\/\|https\\:\\/\\/\|ftp\\:\\/\\/)\|(www.))+ | it can start matching anywhere |
178181
| regexplib/uri.js:59:2:59:13 | [a-zA-Z]{3,} | it can start matching anywhere |
182+
| regexplib/uri.js:64:31:64:36 | [\\w-]+ | it can start matching anywhere after the start of the preceeding '[\\w-\\s]*' |
179183
| regexplib/uri.js:73:2:73:4 | .*? | it can start matching anywhere |
180184
| tst.js:14:13:14:18 | (.*,)+ | it can start matching anywhere |
181185
| tst.js:14:14:14:15 | .* | it can start matching anywhere |

0 commit comments

Comments
 (0)