Skip to content

Commit 9d6c07c

Browse files
committed
Python: Add copy of old queries
1 parent b620b9b commit 9d6c07c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1373
-0
lines changed
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>
8+
Accessing files using paths constructed from user-controlled data can allow an attacker to access
9+
unexpected resources. This can result in sensitive information being revealed or deleted, or an
10+
attacker being able to influence behavior by modifying unexpected files.
11+
</p>
12+
</overview>
13+
14+
<recommendation>
15+
<p>
16+
Validate user input before using it to construct a file path, either using an off-the-shelf library function
17+
like <code>werkzeug.utils.secure_filename</code>, or by performing custom validation.
18+
</p>
19+
20+
<p>
21+
Ideally, follow these rules:
22+
</p>
23+
24+
<ul>
25+
<li>Do not allow more than a single "." character.</li>
26+
<li>Do not allow directory separators such as "/" or "\" (depending on the file system).</li>
27+
<li>Do not rely on simply replacing problematic sequences such as "../". For example, after
28+
applying this filter to ".../...//", the resulting string would still be "../".</li>
29+
<li>Use an allowlist of known good patterns.</li>
30+
</ul>
31+
</recommendation>
32+
33+
<example>
34+
<p>
35+
In the first example, a file name is read from an HTTP request and then used to access a file.
36+
However, a malicious user could enter a file name that is an absolute path, such as
37+
<code>"/etc/passwd"</code>.
38+
</p>
39+
40+
<p>
41+
In the second example, it appears that the user is restricted to opening a file within the
42+
<code>"user"</code> home directory. However, a malicious user could enter a file name containing
43+
special characters. For example, the string <code>"../../../etc/passwd"</code> will result in the code
44+
reading the file located at <code>"/server/static/images/../../../etc/passwd"</code>, which is the system's
45+
password file. This file would then be sent back to the user, giving them access to all the
46+
system's passwords.
47+
</p>
48+
49+
<p>
50+
In the third example, the path used to access the file system is normalized <em>before</em> being checked against a
51+
known prefix. This ensures that regardless of the user input, the resulting path is safe.
52+
</p>
53+
54+
<sample src="examples/tainted_path.py" />
55+
</example>
56+
57+
<references>
58+
<li>OWASP: <a href="https://owasp.org/www-community/attacks/Path_Traversal">Path Traversal</a>.</li>
59+
<li>npm: <a href="http://werkzeug.pocoo.org/docs/utils/#werkzeug.utils.secure_filename">werkzeug.utils.secure_filename</a>.</li>
60+
</references>
61+
</qhelp>
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/**
2+
* @name Uncontrolled data used in path expression
3+
* @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
4+
* @kind path-problem
5+
* @problem.severity error
6+
* @sub-severity high
7+
* @precision high
8+
* @id py/path-injection
9+
* @tags correctness
10+
* security
11+
* external/owasp/owasp-a1
12+
* external/cwe/cwe-022
13+
* external/cwe/cwe-023
14+
* external/cwe/cwe-036
15+
* external/cwe/cwe-073
16+
* external/cwe/cwe-099
17+
*/
18+
19+
import python
20+
import semmle.python.security.Paths
21+
/* Sources */
22+
import semmle.python.web.HttpRequest
23+
/* Sinks */
24+
import semmle.python.security.injection.Path
25+
26+
class PathInjectionConfiguration extends TaintTracking::Configuration {
27+
PathInjectionConfiguration() { this = "Path injection configuration" }
28+
29+
override predicate isSource(TaintTracking::Source source) {
30+
source instanceof HttpRequestTaintSource
31+
}
32+
33+
override predicate isSink(TaintTracking::Sink sink) { sink instanceof OpenNode }
34+
35+
override predicate isSanitizer(Sanitizer sanitizer) {
36+
sanitizer instanceof PathSanitizer or
37+
sanitizer instanceof NormalizedPathSanitizer
38+
}
39+
40+
override predicate isExtension(TaintTracking::Extension extension) {
41+
extension instanceof AbsPath
42+
}
43+
}
44+
45+
from PathInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
46+
where config.hasFlowPath(src, sink)
47+
select sink.getSink(), src, sink, "This path depends on $@.", src.getSource(),
48+
"a user-provided value"
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>Extracting files from a malicious tar archive without validating that the destination file path
8+
is within the destination directory can cause files outside the destination directory to be
9+
overwritten, due to the possible presence of directory traversal elements (<code>..</code>) in
10+
archive paths.</p>
11+
12+
<p>Tar archives contain archive entries representing each file in the archive. These entries
13+
include a file path for the entry, but these file paths are not restricted and may contain
14+
unexpected special elements such as the directory traversal element (<code>..</code>). If these
15+
file paths are used to determine an output file to write the contents of the archive item to, then
16+
the file may be written to an unexpected ___location. This can result in sensitive information being
17+
revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
18+
files.</p>
19+
20+
<p>For example, if a tar archive contains a file entry <code>..\sneaky-file</code>, and the tar archive
21+
is extracted to the directory <code>c:\output</code>, then naively combining the paths would result
22+
in an output file path of <code>c:\output\..\sneaky-file</code>, which would cause the file to be
23+
written to <code>c:\sneaky-file</code>.</p>
24+
25+
</overview>
26+
<recommendation>
27+
28+
<p>Ensure that output paths constructed from tar archive entries are validated
29+
to prevent writing files to unexpected locations.</p>
30+
31+
<p>The recommended way of writing an output file from a tar archive entry is to check that
32+
<code>".."</code> does not occur in the path.
33+
</p>
34+
35+
</recommendation>
36+
37+
<example>
38+
<p>
39+
In this example an archive is extracted without validating file paths.
40+
If <code>archive.tar</code> contained relative paths (for
41+
instance, if it were created by something like <code>tar -cf archive.tar
42+
../file.txt</code>) then executing this code could write to locations
43+
outside the destination directory.
44+
</p>
45+
46+
<sample src="examples/tarslip_bad.py" />
47+
48+
<p>To fix this vulnerability, we need to check that the path does not
49+
contain any <code>".."</code> elements in it.
50+
</p>
51+
52+
<sample src="examples/tarslip_good.py" />
53+
54+
</example>
55+
<references>
56+
57+
<li>
58+
Snyk:
59+
<a href="https://snyk.io/research/zip-slip-vulnerability">Zip Slip Vulnerability</a>.
60+
</li>
61+
<li>
62+
OWASP:
63+
<a href="https://owasp.org/www-community/attacks/Path_Traversal">Path Traversal</a>.
64+
</li>
65+
<li>
66+
Python Library Reference:
67+
<a href="https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extract">TarFile.extract</a>.
68+
</li>
69+
<li>
70+
Python Library Reference:
71+
<a href="https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall">TarFile.extractall</a>.
72+
</li>
73+
74+
</references>
75+
</qhelp>
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
/**
2+
* @name Arbitrary file write during tarfile extraction
3+
* @description Extracting files from a malicious tar archive without validating that the
4+
* destination file path is within the destination directory can cause files outside
5+
* the destination directory to be overwritten.
6+
* @kind path-problem
7+
* @id py/tarslip
8+
* @problem.severity error
9+
* @precision medium
10+
* @tags security
11+
* external/cwe/cwe-022
12+
*/
13+
14+
import python
15+
import semmle.python.security.Paths
16+
import semmle.python.dataflow.TaintTracking
17+
import semmle.python.security.strings.Basic
18+
19+
/** A TaintKind to represent open tarfile objects. That is, the result of calling `tarfile.open(...)` */
20+
class OpenTarFile extends TaintKind {
21+
OpenTarFile() { this = "tarfile.open" }
22+
23+
override TaintKind getTaintOfMethodResult(string name) {
24+
name = "getmember" and result instanceof TarFileInfo
25+
or
26+
name = "getmembers" and result.(SequenceKind).getItem() instanceof TarFileInfo
27+
}
28+
29+
override ClassValue getType() { result = Value::named("tarfile.TarFile") }
30+
31+
override TaintKind getTaintForIteration() { result instanceof TarFileInfo }
32+
}
33+
34+
/** The source of open tarfile objects. That is, any call to `tarfile.open(...)` */
35+
class TarfileOpen extends TaintSource {
36+
TarfileOpen() {
37+
Value::named("tarfile.open").getACall() = this and
38+
/*
39+
* If argument refers to a string object, then it's a hardcoded path and
40+
* this tarfile is safe.
41+
*/
42+
43+
not this.(CallNode).getAnArg().pointsTo(any(StringValue str)) and
44+
/* Ignore opens within the tarfile module itself */
45+
not this.(ControlFlowNode).getLocation().getFile().getBaseName() = "tarfile.py"
46+
}
47+
48+
override predicate isSourceOf(TaintKind kind) { kind instanceof OpenTarFile }
49+
}
50+
51+
class TarFileInfo extends TaintKind {
52+
TarFileInfo() { this = "tarfile.entry" }
53+
54+
override TaintKind getTaintOfMethodResult(string name) { name = "next" and result = this }
55+
56+
override TaintKind getTaintOfAttribute(string name) {
57+
name = "name" and result instanceof TarFileInfo
58+
}
59+
}
60+
61+
/*
62+
* For efficiency we don't want to track the flow of taint
63+
* around the tarfile module.
64+
*/
65+
66+
class ExcludeTarFilePy extends Sanitizer {
67+
ExcludeTarFilePy() { this = "Tar sanitizer" }
68+
69+
override predicate sanitizingNode(TaintKind taint, ControlFlowNode node) {
70+
node.getLocation().getFile().getBaseName() = "tarfile.py" and
71+
(
72+
taint instanceof OpenTarFile
73+
or
74+
taint instanceof TarFileInfo
75+
or
76+
taint.(SequenceKind).getItem() instanceof TarFileInfo
77+
)
78+
}
79+
}
80+
81+
/* Any call to an extractall method */
82+
class ExtractAllSink extends TaintSink {
83+
CallNode call;
84+
85+
ExtractAllSink() {
86+
this = call.getFunction().(AttrNode).getObject("extractall") and
87+
count(call.getAnArg()) = 0
88+
}
89+
90+
override predicate sinks(TaintKind kind) { kind instanceof OpenTarFile }
91+
}
92+
93+
/* Argument to extract method */
94+
class ExtractSink extends TaintSink {
95+
CallNode call;
96+
97+
ExtractSink() {
98+
call.getFunction().(AttrNode).getName() = "extract" and
99+
this = call.getArg(0)
100+
}
101+
102+
override predicate sinks(TaintKind kind) { kind instanceof TarFileInfo }
103+
}
104+
105+
/* Members argument to extract method */
106+
class ExtractMembersSink extends TaintSink {
107+
CallNode call;
108+
109+
ExtractMembersSink() {
110+
call.getFunction().(AttrNode).getName() = "extractall" and
111+
(this = call.getArg(0) or this = call.getArgByName("members"))
112+
}
113+
114+
override predicate sinks(TaintKind kind) {
115+
kind.(SequenceKind).getItem() instanceof TarFileInfo
116+
or
117+
kind instanceof OpenTarFile
118+
}
119+
}
120+
121+
class TarFileInfoSanitizer extends Sanitizer {
122+
TarFileInfoSanitizer() { this = "TarInfo sanitizer" }
123+
124+
/** The test `if <path_sanitizing_test>:` clears taint on its `false` edge. */
125+
override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) {
126+
taint instanceof TarFileInfo and
127+
clears_taint_on_false_edge(test.getTest(), test.getSense())
128+
}
129+
130+
private predicate clears_taint_on_false_edge(ControlFlowNode test, boolean sense) {
131+
path_sanitizing_test(test) and
132+
sense = false
133+
or
134+
// handle `not` (also nested)
135+
test.(UnaryExprNode).getNode().getOp() instanceof Not and
136+
clears_taint_on_false_edge(test.(UnaryExprNode).getOperand(), sense.booleanNot())
137+
}
138+
}
139+
140+
private predicate path_sanitizing_test(ControlFlowNode test) {
141+
/* Assume that any test with "path" in it is a sanitizer */
142+
test.getAChild+().(AttrNode).getName().matches("%path")
143+
or
144+
test.getAChild+().(NameNode).getId().matches("%path")
145+
}
146+
147+
class TarSlipConfiguration extends TaintTracking::Configuration {
148+
TarSlipConfiguration() { this = "TarSlip configuration" }
149+
150+
override predicate isSource(TaintTracking::Source source) { source instanceof TarfileOpen }
151+
152+
override predicate isSink(TaintTracking::Sink sink) {
153+
sink instanceof ExtractSink or
154+
sink instanceof ExtractAllSink or
155+
sink instanceof ExtractMembersSink
156+
}
157+
158+
override predicate isSanitizer(Sanitizer sanitizer) {
159+
sanitizer instanceof TarFileInfoSanitizer
160+
or
161+
sanitizer instanceof ExcludeTarFilePy
162+
}
163+
164+
override predicate isBarrier(DataFlow::Node node) {
165+
// Avoid flow into the tarfile module
166+
exists(ParameterDefinition def |
167+
node.asVariable().getDefinition() = def
168+
or
169+
node.asCfgNode() = def.getDefiningNode()
170+
|
171+
def.getScope() = Value::named("tarfile.open").(CallableValue).getScope()
172+
or
173+
def.isSelf() and def.getScope().getEnclosingModule().getName() = "tarfile"
174+
)
175+
}
176+
}
177+
178+
from TarSlipConfiguration config, TaintedPathSource src, TaintedPathSink sink
179+
where config.hasFlowPath(src, sink)
180+
select sink.getSink(), src, sink, "Extraction of tarfile from $@", src.getSource(),
181+
"a potentially untrusted source"

0 commit comments

Comments
 (0)