Skip to content

Commit 89613db

Browse files
committed
JS: add query for incomplete HTML attribute sanitization
1 parent d98e956 commit 89613db

9 files changed

+519
-0
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/**
2+
* @name Incomplete HTML attribute sanitization
3+
* @description Writing incompletely sanitized values to HTML
4+
* attribute strings can lead to a cross-site
5+
* scripting vulnerability.
6+
* @kind path-problem
7+
* @problem.severity warning
8+
* @precision high
9+
* @id js/incomplete-html-attribute-sanitization
10+
* @tags security
11+
* external/cwe/cwe-079
12+
* external/cwe/cwe-116
13+
* external/cwe/cwe-20
14+
*/
15+
16+
import javascript
17+
import DataFlow::PathGraph
18+
import semmle.javascript.security.dataflow.IncompleteHtmlAttributeSanitization::IncompleteHtmlAttributeSanitization
19+
import semmle.javascript.security.IncompleteBlacklistSanitizer
20+
21+
/**
22+
* Gets a pretty string of the dangerous characters for `sink`.
23+
*/
24+
string prettyPrintDangerousCharaters(Sink sink) {
25+
result =
26+
strictconcat(string s |
27+
s = describeCharacters(sink.getADangerousCharacter())
28+
|
29+
s, ", " order by s
30+
).regexpReplaceAll(",(?=[^,]+$)", " or")
31+
}
32+
33+
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
34+
where cfg.hasFlowPath(source, sink)
35+
select sink.getNode(), source, sink,
36+
// this message is slightly sub-optimal as we do not have an easy way
37+
// to get the flow labels that reach the sink, so the message includes
38+
// all of them in a disjunction
39+
"Cross-site scripting vulnerability as the output of $@ may contain " +
40+
prettyPrintDangerousCharaters(sink.getNode()) + " when it reaches this attribute definition.",
41+
source.getNode(), "this final HTML sanitizer step"
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
/**
2+
* Provides classes and predicates for working with incomplete blacklist sanitizers.
3+
*/
4+
5+
import javascript
6+
7+
/**
8+
* An incomplete black-list sanitizer.
9+
*/
10+
abstract class IncompleteBlacklistSanitizer extends DataFlow::Node {
11+
/**
12+
* Gets a relevant character that is not sanitized by this sanitizer.
13+
*/
14+
abstract string getAnUnsanitizedCharacter();
15+
16+
/**
17+
* Gets the kind of sanitization this sanitizer performs.
18+
*/
19+
abstract string getKind();
20+
}
21+
22+
/**
23+
* Describes the characters represented by `rep`.
24+
*/
25+
string describeCharacters(string rep) {
26+
rep = "\"" and result = "quotes"
27+
or
28+
rep = "&" and result = "ampersands"
29+
or
30+
rep = "<" and result = "less-thans"
31+
or
32+
rep = ">" and result = "greater-thans"
33+
}
34+
35+
/**
36+
* A local sequence of calls to `String.prototype.replace`,
37+
* represented by the last call.
38+
*/
39+
class StringReplaceCallSequence extends DataFlow::CallNode {
40+
StringReplaceCallSequence() {
41+
this instanceof StringReplaceCall and
42+
not exists(getAStringReplaceMethodCall(this)) // terminal
43+
}
44+
45+
/**
46+
* Gets a member of this sequence.
47+
*/
48+
StringReplaceCall getAMember() { this = getAStringReplaceMethodCall*(result) }
49+
50+
/** Gets a string that is the replacement of this call. */
51+
string getAReplacementString() {
52+
// this is more restrictive than `StringReplaceCall::replaces/2`, in the name of precision
53+
getAMember().getRawReplacement().getStringValue() = result
54+
}
55+
56+
/** Gets a string that is being replaced by this call. */
57+
string getAReplacedString() { getAMember().getAReplacedString() = result }
58+
}
59+
60+
/**
61+
* A specialized version of `DataFlow::Node::getAMethodCall` that is
62+
* restricted to `StringReplaceCall`-nodes.
63+
*/
64+
private StringReplaceCall getAStringReplaceMethodCall(StringReplaceCall n) {
65+
n.getAMethodCall() = result
66+
}
67+
68+
/**
69+
* Provides predicates and classes for reasoning about HTML sanitization.
70+
*/
71+
module HtmlSanitization {
72+
private predicate fixedGlobalReplacement(StringReplaceCallSequence chain) {
73+
forall(StringReplaceCall member | member = chain.getAMember() |
74+
member.isGlobal() and member.getArgument(0) instanceof DataFlow::RegExpLiteralNode
75+
)
76+
}
77+
78+
/**
79+
* Gets a HTML-relevant character that is replaced by `chain`.
80+
*/
81+
private string getALikelyReplacedCharacter(StringReplaceCallSequence chain) {
82+
result = "\"" and
83+
(
84+
chain.getAReplacedString() = result or
85+
chain.getAReplacementString() = "&quot;" or
86+
chain.getAReplacementString() = "&#34;"
87+
)
88+
or
89+
result = "&" and
90+
(
91+
chain.getAReplacedString() = result or
92+
chain.getAReplacementString() = "&amp;" or
93+
chain.getAReplacementString() = "&#40;"
94+
)
95+
or
96+
result = "<" and
97+
(
98+
chain.getAReplacedString() = result or
99+
chain.getAReplacementString() = "&lt;" or
100+
chain.getAReplacementString() = "&#60;"
101+
)
102+
or
103+
result = ">" and
104+
(
105+
chain.getAReplacedString() = result or
106+
chain.getAReplacementString() = "&gt;" or
107+
chain.getAReplacementString() = "&#62;"
108+
)
109+
}
110+
111+
/**
112+
* An incomplete sanitizer for HTML-relevant characters.
113+
*/
114+
class IncompleteSanitizer extends IncompleteBlacklistSanitizer {
115+
StringReplaceCallSequence chain;
116+
string unsanitized;
117+
118+
IncompleteSanitizer() {
119+
this = chain and
120+
fixedGlobalReplacement(chain) and
121+
not getALikelyReplacedCharacter(chain) = unsanitized and
122+
(
123+
// replaces `<` and `>`
124+
getALikelyReplacedCharacter(chain) = "<" and
125+
getALikelyReplacedCharacter(chain) = ">" and
126+
(
127+
unsanitized = "\""
128+
or
129+
unsanitized = "&"
130+
)
131+
or
132+
// replaces '&' and either `<` or `>`
133+
getALikelyReplacedCharacter(chain) = "&" and
134+
(
135+
getALikelyReplacedCharacter(chain) = ">" and
136+
unsanitized = ">"
137+
or
138+
getALikelyReplacedCharacter(chain) = "<" and
139+
unsanitized = "<"
140+
)
141+
) and
142+
// does not replace special characters that the browser doesn't care for
143+
not chain.getAReplacedString() = ["!", "#", "*", "?", "@", "|", "~"] and
144+
/// only replaces explicit characters: exclude character ranges and negated character classes
145+
not exists(RegExpTerm t | t = chain.getAMember().getRegExp().getRoot().getAChild*() |
146+
t.(RegExpCharacterClass).isInverted() or
147+
t instanceof RegExpCharacterRange
148+
) and
149+
// the replacements are either empty or HTML entities
150+
chain.getAReplacementString().regexpMatch("(?i)(|(&[#a-z0-9]+;))")
151+
}
152+
153+
override string getKind() { result = "HTML" }
154+
155+
override string getAnUnsanitizedCharacter() { result = unsanitized }
156+
}
157+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/**
2+
* Provides a taint tracking configuration for reasoning about
3+
* incomplete HTML sanitization vulnerabilities.
4+
*
5+
* Note, for performance reasons: only import this file if
6+
* `IncompleteHtmlAttributeSanitization::Configuration` is needed, otherwise
7+
* `IncompleteHtmlAttributeSanitizationCustomizations` should be imported instead.
8+
*/
9+
10+
import javascript
11+
12+
module IncompleteHtmlAttributeSanitization {
13+
import IncompleteHtmlAttributeSanitizationCustomizations::IncompleteHtmlAttributeSanitization
14+
15+
private module Label {
16+
class Quote extends DataFlow::FlowLabel {
17+
Quote() { this = "\"" }
18+
}
19+
20+
class Ampersand extends DataFlow::FlowLabel {
21+
Ampersand() { this = "&" }
22+
}
23+
24+
DataFlow::FlowLabel characterToLabel(string c) { c = result }
25+
}
26+
27+
/**
28+
* A taint-tracking configuration for reasoning about incomplete HTML sanitization vulnerabilities.
29+
*/
30+
class Configuration extends TaintTracking::Configuration {
31+
Configuration() { this = "IncompleteHtmlAttributeSanitization" }
32+
33+
override predicate isSource(DataFlow::Node source, DataFlow::FlowLabel label) {
34+
label = Label::characterToLabel(source.(Source).getAnUnsanitizedCharacter())
35+
}
36+
37+
override predicate isSink(DataFlow::Node sink, DataFlow::FlowLabel label) {
38+
label = Label::characterToLabel(sink.(Sink).getADangerousCharacter())
39+
}
40+
41+
override predicate isAdditionalFlowStep(
42+
DataFlow::Node src, DataFlow::Node dst, DataFlow::FlowLabel srclabel,
43+
DataFlow::FlowLabel dstlabel
44+
) {
45+
super.isAdditionalFlowStep(src, dst) and srclabel = dstlabel
46+
}
47+
48+
override predicate isLabeledBarrier(DataFlow::Node node, DataFlow::FlowLabel lbl) {
49+
lbl = Label::characterToLabel(node.(StringReplaceCall).getAReplacedString()) or
50+
isSanitizer(node)
51+
}
52+
53+
override predicate isSanitizer(DataFlow::Node n) {
54+
n instanceof Sanitizer or
55+
super.isSanitizer(n)
56+
}
57+
}
58+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/**
2+
* Provides default sources, sinks and sanitizers for reasoning about
3+
* incomplete HTML sanitization vulnerabilities, as well as extension
4+
* points for adding your own.
5+
*/
6+
7+
import javascript
8+
import semmle.javascript.security.dataflow.RemoteFlowSources
9+
import semmle.javascript.security.IncompleteBlacklistSanitizer
10+
11+
module IncompleteHtmlAttributeSanitization {
12+
/**
13+
* A data flow source for incomplete HTML sanitization vulnerabilities.
14+
*/
15+
abstract class Source extends DataFlow::Node {
16+
/**
17+
* Gets a character that may come out of this source.
18+
*/
19+
abstract string getAnUnsanitizedCharacter();
20+
}
21+
22+
/**
23+
* A data flow sink for incomplete HTML sanitization vulnerabilities.
24+
*/
25+
abstract class Sink extends DataFlow::Node {
26+
/**
27+
* Gets a character that is dangerous for this sink.
28+
*/
29+
abstract string getADangerousCharacter();
30+
}
31+
32+
/**
33+
* A sanitizer for incomplete HTML sanitization vulnerabilities.
34+
*/
35+
abstract class Sanitizer extends DataFlow::Node { }
36+
37+
/**
38+
* A source of incompletely sanitized characters, considered as a
39+
* flow source for incomplete HTML sanitization vulnerabilities.
40+
*/
41+
class IncompleteHtmlSanitizerAsSource extends Source, HtmlSanitization::IncompleteSanitizer {
42+
override string getAnUnsanitizedCharacter() {
43+
result = HtmlSanitization::IncompleteSanitizer.super.getAnUnsanitizedCharacter()
44+
}
45+
}
46+
47+
/**
48+
* A concatenation that syntactically looks like a definition of an HTML attribute.
49+
*/
50+
class HtmlAttributeConcatenation extends StringOps::ConcatenationLeaf {
51+
string lhs;
52+
53+
HtmlAttributeConcatenation() {
54+
lhs = this.getPreviousLeaf().getStringValue().regexpCapture("(.*)=\"[^\"]*", 1) and
55+
this.getNextLeaf().getStringValue().regexpMatch(".*\".*")
56+
}
57+
58+
/**
59+
* Holds if the attribute value is interpreted as JavaScript source code.
60+
*/
61+
predicate isInterpretedAsJavaScript() { lhs.regexpMatch("(?i)(.* )?on[a-z]+") }
62+
}
63+
64+
/**
65+
* A concatenation that syntactically looks like a definition of an
66+
* HTML attribute, as a sink for incomplete HTML sanitization
67+
* vulnerabilities.
68+
*/
69+
class HtmlAttributeConcatenationAsSink extends Sink, DataFlow::ValueNode,
70+
HtmlAttributeConcatenation {
71+
override string getADangerousCharacter() {
72+
isInterpretedAsJavaScript() and result = "&"
73+
or
74+
result = "\""
75+
}
76+
}
77+
78+
/**
79+
* An encoder for potentially malicious characters, as a sanitizer
80+
* for incomplete HTML sanitization vulnerabilities.
81+
*/
82+
class EncodingSanitizer extends Sanitizer {
83+
EncodingSanitizer() {
84+
this = DataFlow::globalVarRef(["encodeURIComponent", "encodeURI"]).getACall()
85+
}
86+
}
87+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
| tst.js:206:2:206:24 | s().rep ... ]/g,'') | This HTML sanitizer does not sanitize ampersands |
2+
| tst.js:206:2:206:24 | s().rep ... ]/g,'') | This HTML sanitizer does not sanitize quotes |
3+
| tst.js:207:2:207:26 | s().rep ... /g, '') | This HTML sanitizer does not sanitize quotes |
4+
| tst.js:208:2:208:26 | s().rep ... /g, '') | This HTML sanitizer does not sanitize ampersands |
5+
| tst.js:209:2:209:40 | s().rep ... /g, '') | This HTML sanitizer does not sanitize ampersands |
6+
| tst.js:209:2:209:40 | s().rep ... /g, '') | This HTML sanitizer does not sanitize quotes |
7+
| tst.js:210:2:210:58 | s().rep ... /g, '') | This HTML sanitizer does not sanitize quotes |
8+
| tst.js:211:2:211:58 | s().rep ... /g, '') | This HTML sanitizer does not sanitize quotes |
9+
| tst.js:212:2:212:58 | s().rep ... /g, '') | This HTML sanitizer does not sanitize quotes |
10+
| tst.js:215:6:215:24 | s.replace(/>/g, '') | This HTML sanitizer does not sanitize ampersands |
11+
| tst.js:215:6:215:24 | s.replace(/>/g, '') | This HTML sanitizer does not sanitize quotes |
12+
| tst.js:217:2:217:93 | s().rep ... &#39;') | This HTML sanitizer does not sanitize quotes |
13+
| tst.js:243:9:243:31 | s().rep ... ]/g,'') | This HTML sanitizer does not sanitize ampersands |
14+
| tst.js:243:9:243:31 | s().rep ... ]/g,'') | This HTML sanitizer does not sanitize quotes |
15+
| tst.js:244:9:244:33 | s().rep ... /g, '') | This HTML sanitizer does not sanitize quotes |
16+
| tst.js:245:9:245:33 | s().rep ... /g, '') | This HTML sanitizer does not sanitize ampersands |
17+
| tst.js:249:9:249:33 | s().rep ... ]/g,'') | This HTML sanitizer does not sanitize quotes |
18+
| tst.js:251:9:251:33 | s().rep ... ]/g,'') | This HTML sanitizer does not sanitize quotes |
19+
| tst.js:253:21:253:45 | s().rep ... /g, '') | This HTML sanitizer does not sanitize ampersands |
20+
| tst.js:254:32:254:56 | s().rep ... /g, '') | This HTML sanitizer does not sanitize ampersands |
21+
| tst.js:255:26:255:50 | s().rep ... /g, '') | This HTML sanitizer does not sanitize ampersands |
22+
| tst.js:256:15:256:39 | s().rep ... /g, '') | This HTML sanitizer does not sanitize ampersands |
23+
| tst.js:261:10:261:81 | value.r ... '&gt;') | This HTML sanitizer does not sanitize quotes |
24+
| tst.js:270:61:270:85 | s().rep ... /g, '') | This HTML sanitizer does not sanitize ampersands |
25+
| tst.js:272:28:272:50 | s().rep ... ]/g,'') | This HTML sanitizer does not sanitize ampersands |
26+
| tst.js:272:28:272:50 | s().rep ... ]/g,'') | This HTML sanitizer does not sanitize quotes |
27+
| tst.js:274:12:274:94 | s().val ... g , '') | This HTML sanitizer does not sanitize ampersands |
28+
| tst.js:274:12:274:94 | s().val ... g , '') | This HTML sanitizer does not sanitize quotes |
29+
| tst.js:277:9:277:29 | arr2.re ... "/g,"") | This HTML sanitizer does not sanitize ampersands |
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import javascript
2+
import semmle.javascript.security.IncompleteBlacklistSanitizer
3+
4+
from IncompleteBlacklistSanitizer sanitizer
5+
select sanitizer,
6+
"This " + sanitizer.getKind() + " sanitizer does not sanitize " +
7+
describeCharacters(sanitizer.getAnUnsanitizedCharacter())

0 commit comments

Comments
 (0)