Skip to content

Commit 4c7a500

Browse files
authored
Merge pull request github#3314 from RasmusWL/python-model-stdlib-http.server
Approved by tausbn
2 parents 8ed8d3c + 2b30252 commit 4c7a500

File tree

11 files changed

+393
-0
lines changed

11 files changed

+393
-0
lines changed

python/ql/src/semmle/python/web/HttpRequest.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ import semmle.python.web.bottle.Request
77
import semmle.python.web.turbogears.Request
88
import semmle.python.web.falcon.Request
99
import semmle.python.web.cherrypy.Request
10+
import semmle.python.web.stdlib.Request

python/ql/src/semmle/python/web/HttpResponse.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ import semmle.python.web.bottle.Response
77
import semmle.python.web.turbogears.Response
88
import semmle.python.web.falcon.Response
99
import semmle.python.web.cherrypy.Response
10+
import semmle.python.web.stdlib.Response
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
/**
2+
* Provides the sources and taint-flow for HTTP servers defined using the standard library (stdlib).
3+
* Specifically, we model `HttpRequestTaintSource`s from instances of `BaseHTTPRequestHandler`
4+
* (or subclasses) and form parsing using `cgi.FieldStorage`.
5+
*/
6+
import python
7+
import semmle.python.security.TaintTracking
8+
import semmle.python.web.Http
9+
10+
/** Source of BaseHTTPRequestHandler instances. */
11+
class StdLibRequestSource extends HttpRequestTaintSource {
12+
StdLibRequestSource() {
13+
exists(ClassValue cls |
14+
cls.getABaseType+() = Value::named("BaseHTTPServer.BaseHTTPRequestHandler")
15+
or
16+
cls.getABaseType+() = Value::named("http.server.BaseHTTPRequestHandler")
17+
|
18+
this.(ControlFlowNode).pointsTo().getClass() = cls
19+
)
20+
}
21+
22+
override predicate isSourceOf(TaintKind kind) { kind instanceof BaseHTTPRequestHandlerKind }
23+
}
24+
25+
/** TaintKind for an instance of BaseHTTPRequestHandler. */
26+
class BaseHTTPRequestHandlerKind extends TaintKind {
27+
BaseHTTPRequestHandlerKind() { this = "BaseHTTPRequestHandlerKind" }
28+
29+
override TaintKind getTaintOfAttribute(string name) {
30+
name in ["requestline", "path"] and
31+
result instanceof ExternalStringKind
32+
or
33+
name = "headers" and
34+
result instanceof HTTPMessageKind
35+
or
36+
name = "rfile" and
37+
result instanceof ExternalFileObject
38+
}
39+
}
40+
41+
/** TaintKind for headers (instance of HTTPMessage). */
42+
class HTTPMessageKind extends ExternalStringDictKind {
43+
override TaintKind getTaintOfMethodResult(string name) {
44+
result = super.getTaintOfMethodResult(name)
45+
or
46+
name = "get_all" and
47+
result.(SequenceKind).getItem() = this.getValue()
48+
or
49+
name in ["as_bytes", "as_string"] and
50+
result instanceof ExternalStringKind
51+
}
52+
53+
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
54+
result = super.getTaintForFlowStep(fromnode, tonode)
55+
or
56+
exists(ClassValue cls | cls = ClassValue::unicode() or cls = ClassValue::bytes() |
57+
tonode = cls.getACall() and
58+
tonode.(CallNode).getArg(0) = fromnode and
59+
result instanceof ExternalStringKind
60+
)
61+
}
62+
}
63+
64+
/** Source of parsed HTTP forms (by using the `cgi` module). */
65+
class CgiFieldStorageSource extends HttpRequestTaintSource {
66+
CgiFieldStorageSource() { this = Value::named("cgi.FieldStorage").getACall() }
67+
68+
override predicate isSourceOf(TaintKind kind) { kind instanceof CgiFieldStorageFormKind }
69+
}
70+
71+
/** TaintKind for a parsed HTTP form. */
72+
class CgiFieldStorageFormKind extends TaintKind {
73+
/*
74+
* There is a slight difference between how we model form/fields and how it is handled by the code.
75+
* In the code
76+
* ```
77+
* form = cgi.FieldStorage()
78+
* field = form['myfield']
79+
* ```
80+
* both `form` and `field` have the type `cgi.FieldStorage`. This allows the code to represent
81+
* nested forms as `form['nested_form']['myfield']`. However, since HTML forms can't be nested
82+
* we ignore that detail since it allows for a more clean modeling.
83+
*/
84+
CgiFieldStorageFormKind() { this = "CgiFieldStorageFormKind" }
85+
86+
override TaintKind getTaintOfAttribute(string name) {
87+
name = "value" and result.(SequenceKind).getItem() instanceof CgiFieldStorageFieldKind
88+
}
89+
90+
override TaintKind getTaintOfMethodResult(string name) {
91+
name = "getvalue" and
92+
(
93+
result instanceof ExternalStringKind
94+
or
95+
result.(SequenceKind).getItem() instanceof ExternalStringKind
96+
)
97+
or
98+
name = "getfirst" and
99+
result instanceof ExternalStringKind
100+
or
101+
name = "getlist" and
102+
result.(SequenceKind).getItem() instanceof ExternalStringKind
103+
}
104+
105+
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
106+
tonode.(SubscriptNode).getObject() = fromnode and
107+
(
108+
result instanceof CgiFieldStorageFieldKind
109+
or
110+
result.(SequenceKind).getItem() instanceof CgiFieldStorageFieldKind
111+
)
112+
}
113+
}
114+
115+
/** TaintKind for the field of a parsed HTTP form. */
116+
class CgiFieldStorageFieldKind extends TaintKind {
117+
CgiFieldStorageFieldKind() { this = "CgiFieldStorageFieldKind" }
118+
119+
override TaintKind getTaintOfAttribute(string name) {
120+
name in ["filename", "value"] and result instanceof ExternalStringKind
121+
or
122+
name = "file" and result instanceof ExternalFileObject
123+
}
124+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/**
2+
* Provides the sinks for HTTP servers defined with standard library (stdlib).
3+
*/
4+
5+
import python
6+
import semmle.python.security.TaintTracking
7+
import semmle.python.web.Http
8+
9+
private predicate is_wfile(AttrNode wfile) {
10+
exists(ClassValue cls |
11+
// Python 2
12+
cls.getABaseType+() = Value::named("BaseHTTPServer.BaseHTTPRequestHandler")
13+
or
14+
// Python 3
15+
cls.getABaseType+() = Value::named("http.server.BaseHTTPRequestHandler")
16+
|
17+
wfile.getObject("wfile").pointsTo().getClass() = cls
18+
)
19+
}
20+
21+
/** Sink for `h.wfile.write` where `h` is an instance of BaseHTTPRequestHandler. */
22+
class StdLibWFileWriteSink extends HttpResponseTaintSink {
23+
StdLibWFileWriteSink() {
24+
exists(CallNode call |
25+
is_wfile(call.getFunction().(AttrNode).getObject("write")) and
26+
call.getArg(0) = this
27+
)
28+
}
29+
30+
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
31+
}
32+
33+
/** Sink for `h.wfile.writelines` where `h` is an instance of BaseHTTPRequestHandler. */
34+
class StdLibWFileWritelinesSink extends HttpResponseTaintSink {
35+
StdLibWFileWritelinesSink() {
36+
exists(CallNode call |
37+
is_wfile(call.getFunction().(AttrNode).getObject("writelines")) and
38+
call.getArg(0) = this
39+
)
40+
}
41+
42+
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringSequenceKind }
43+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
| test.py:72:26:72:58 | Taint sink | externally controlled string |
2+
| test.py:73:31:73:54 | Taint sink | [externally controlled string] |
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import python
2+
import semmle.python.web.HttpResponse
3+
import semmle.python.security.strings.Untrusted
4+
5+
from HttpResponseTaintSink sink, TaintKind kind
6+
where sink.sinks(kind)
7+
select sink, kind
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
| test.py:18:13:18:16 | self | BaseHTTPRequestHandlerKind |
2+
| test.py:20:13:20:16 | self | BaseHTTPRequestHandlerKind |
3+
| test.py:22:13:22:16 | self | BaseHTTPRequestHandlerKind |
4+
| test.py:24:13:24:16 | self | BaseHTTPRequestHandlerKind |
5+
| test.py:25:13:25:16 | self | BaseHTTPRequestHandlerKind |
6+
| test.py:26:13:26:16 | self | BaseHTTPRequestHandlerKind |
7+
| test.py:27:13:27:16 | self | BaseHTTPRequestHandlerKind |
8+
| test.py:28:13:28:16 | self | BaseHTTPRequestHandlerKind |
9+
| test.py:29:13:29:16 | self | BaseHTTPRequestHandlerKind |
10+
| test.py:30:13:30:16 | self | BaseHTTPRequestHandlerKind |
11+
| test.py:31:13:31:16 | self | BaseHTTPRequestHandlerKind |
12+
| test.py:32:13:32:16 | self | BaseHTTPRequestHandlerKind |
13+
| test.py:33:17:33:20 | self | BaseHTTPRequestHandlerKind |
14+
| test.py:34:19:34:22 | self | BaseHTTPRequestHandlerKind |
15+
| test.py:36:13:36:16 | self | BaseHTTPRequestHandlerKind |
16+
| test.py:37:13:37:16 | self | BaseHTTPRequestHandlerKind |
17+
| test.py:40:16:44:9 | Attribute() | CgiFieldStorageFormKind |
18+
| test.py:41:13:41:16 | self | BaseHTTPRequestHandlerKind |
19+
| test.py:42:13:42:16 | self | BaseHTTPRequestHandlerKind |
20+
| test.py:43:64:43:67 | self | BaseHTTPRequestHandlerKind |
21+
| test.py:69:9:69:12 | self | BaseHTTPRequestHandlerKind |
22+
| test.py:70:9:70:12 | self | BaseHTTPRequestHandlerKind |
23+
| test.py:71:9:71:12 | self | BaseHTTPRequestHandlerKind |
24+
| test.py:72:9:72:12 | self | BaseHTTPRequestHandlerKind |
25+
| test.py:73:9:73:12 | self | BaseHTTPRequestHandlerKind |
26+
| test.py:74:15:74:18 | self | BaseHTTPRequestHandlerKind |
27+
| test.py:78:16:82:9 | Attribute() | CgiFieldStorageFormKind |
28+
| test.py:79:13:79:16 | self | BaseHTTPRequestHandlerKind |
29+
| test.py:80:13:80:16 | self | BaseHTTPRequestHandlerKind |
30+
| test.py:81:64:81:67 | self | BaseHTTPRequestHandlerKind |
31+
| test.py:85:13:85:16 | self | BaseHTTPRequestHandlerKind |
32+
| test.py:86:13:86:16 | self | BaseHTTPRequestHandlerKind |
33+
| test.py:96:9:96:12 | self | BaseHTTPRequestHandlerKind |
34+
| test.py:97:9:97:12 | self | BaseHTTPRequestHandlerKind |
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import python
2+
import semmle.python.web.HttpRequest
3+
import semmle.python.security.strings.Untrusted
4+
5+
from HttpRequestTaintSource source, TaintKind kind
6+
where
7+
source.isSourceOf(kind) and
8+
source.getLocation().getFile().getShortName() != "cgi.py"
9+
select source.(ControlFlowNode).getNode(), kind
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
| test.py:18 | ok | taint_sources | self | BaseHTTPRequestHandlerKind |
2+
| test.py:20 | ok | taint_sources | Attribute | externally controlled string |
3+
| test.py:22 | ok | taint_sources | Attribute | externally controlled string |
4+
| test.py:24 | ok | taint_sources | Attribute | {externally controlled string} |
5+
| test.py:25 | ok | taint_sources | Subscript | externally controlled string |
6+
| test.py:26 | ok | taint_sources | Attribute() | externally controlled string |
7+
| test.py:27 | ok | taint_sources | Attribute() | [externally controlled string] |
8+
| test.py:28 | fail | taint_sources | Attribute() | <NO TAINT> |
9+
| test.py:29 | ok | taint_sources | Attribute() | [externally controlled string] |
10+
| test.py:30 | fail | taint_sources | Attribute() | <NO TAINT> |
11+
| test.py:31 | ok | taint_sources | Attribute() | externally controlled string |
12+
| test.py:32 | ok | taint_sources | Attribute() | externally controlled string |
13+
| test.py:33 | ok | taint_sources | str() | externally controlled string |
14+
| test.py:34 | ok | taint_sources | bytes() | externally controlled string |
15+
| test.py:36 | ok | taint_sources | Attribute | file[externally controlled string] |
16+
| test.py:37 | ok | taint_sources | Attribute() | externally controlled string |
17+
| test.py:47 | ok | taint_sources | form | CgiFieldStorageFormKind |
18+
| test.py:49 | ok | taint_sources | Subscript | CgiFieldStorageFieldKind |
19+
| test.py:49 | ok | taint_sources | Subscript | [CgiFieldStorageFieldKind] |
20+
| test.py:50 | ok | taint_sources | Attribute | externally controlled string |
21+
| test.py:51 | ok | taint_sources | Attribute | file[externally controlled string] |
22+
| test.py:52 | ok | taint_sources | Attribute | externally controlled string |
23+
| test.py:53 | ok | taint_sources | Subscript | CgiFieldStorageFieldKind |
24+
| test.py:54 | ok | taint_sources | Attribute | externally controlled string |
25+
| test.py:55 | ok | taint_sources | Attribute | file[externally controlled string] |
26+
| test.py:56 | ok | taint_sources | Attribute | externally controlled string |
27+
| test.py:58 | ok | taint_sources | Attribute() | [externally controlled string] |
28+
| test.py:58 | ok | taint_sources | Attribute() | externally controlled string |
29+
| test.py:59 | ok | taint_sources | Subscript | externally controlled string |
30+
| test.py:61 | ok | taint_sources | Attribute() | externally controlled string |
31+
| test.py:63 | ok | taint_sources | Attribute() | [externally controlled string] |
32+
| test.py:64 | ok | taint_sources | Subscript | externally controlled string |
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import python
2+
import semmle.python.security.TaintTracking
3+
import semmle.python.web.HttpRequest
4+
import semmle.python.security.strings.Untrusted
5+
6+
from
7+
Call call, Expr arg, boolean expected_taint, boolean has_taint, string test_res,
8+
string taint_string
9+
where
10+
call.getLocation().getFile().getShortName() = "test.py" and
11+
(
12+
call.getFunc().(Name).getId() = "ensure_tainted" and
13+
expected_taint = true
14+
or
15+
call.getFunc().(Name).getId() = "ensure_not_tainted" and
16+
expected_taint = false
17+
) and
18+
arg = call.getAnArg() and
19+
(
20+
not exists(TaintedNode tainted | tainted.getAstNode() = arg) and
21+
taint_string = "<NO TAINT>" and
22+
has_taint = false
23+
or
24+
exists(TaintedNode tainted | tainted.getAstNode() = arg |
25+
taint_string = tainted.getTaintKind().toString()
26+
) and
27+
has_taint = true
28+
) and
29+
if expected_taint = has_taint then test_res = "ok " else test_res = "fail"
30+
// if expected_taint = has_taint then test_res = "✓" else test_res = "✕"
31+
select arg.getLocation().toString(), test_res, call.getScope().(Function).getName(), arg.toString(),
32+
taint_string

0 commit comments

Comments
 (0)