Skip to content

Commit cb527ca

Browse files
authored
Merge pull request github#4583 from tausbn/python-test-2
Python: Promote experimental queries
2 parents b78234f + 8147ad4 commit cb527ca

File tree

65 files changed

+578
-855
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+578
-855
lines changed

python/ql/src/Security/CWE-022/PathInjection.ql

Lines changed: 87 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,35 +14,103 @@
1414
* external/cwe/cwe-036
1515
* external/cwe/cwe-073
1616
* external/cwe/cwe-099
17+
*
18+
* The query detects cases where a user-controlled path is used in an unsafe manner,
19+
* meaning it is not both normalized and _afterwards_ checked.
20+
*
21+
* It does so by dividing the problematic situation into two cases:
22+
* 1. The file path is never normalized.
23+
* This is easily detected by using normalization as a sanitizer.
24+
*
25+
* 2. The file path is normalized at least once, but never checked afterwards.
26+
* This is detected by finding the earliest normalization and then ensuring that
27+
* no checks happen later. Since we start from the earliest normalization,
28+
* we know that the absence of checks means that no normalization has a
29+
* check after it. (No checks after a second normalization would be ok if
30+
* there was a check between the first and the second.)
31+
*
32+
* Note that one could make the dual split on whether the file path is ever checked. This does
33+
* not work as nicely, however, since checking is modelled as a `BarrierGuard` rather than
34+
* as a `Sanitizer`. That means that only some dataflow paths out of a check will be removed,
35+
* and so identifying the last check is not possible simply by finding a dataflow path from it
36+
* to a sink.
1737
*/
1838

1939
import python
20-
import semmle.python.security.Paths
21-
/* Sources */
22-
import semmle.python.web.HttpRequest
23-
/* Sinks */
24-
import semmle.python.security.injection.Path
40+
import experimental.dataflow.DataFlow
41+
import experimental.dataflow.DataFlow2
42+
import experimental.dataflow.TaintTracking
43+
import experimental.dataflow.TaintTracking2
44+
import experimental.semmle.python.Concepts
45+
import experimental.dataflow.RemoteFlowSources
46+
import ChainedConfigs12
2547

26-
class PathInjectionConfiguration extends TaintTracking::Configuration {
27-
PathInjectionConfiguration() { this = "Path injection configuration" }
48+
// ---------------------------------------------------------------------------
49+
// Case 1. The path is never normalized.
50+
// ---------------------------------------------------------------------------
51+
/** Configuration to find paths from sources to sinks that contain no normalization. */
52+
class PathNotNormalizedConfiguration extends TaintTracking::Configuration {
53+
PathNotNormalizedConfiguration() { this = "PathNotNormalizedConfiguration" }
2854

29-
override predicate isSource(TaintTracking::Source source) {
30-
source instanceof HttpRequestTaintSource
55+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
56+
57+
override predicate isSink(DataFlow::Node sink) {
58+
sink = any(FileSystemAccess e).getAPathArgument()
3159
}
3260

33-
override predicate isSink(TaintTracking::Sink sink) { sink instanceof OpenNode }
61+
override predicate isSanitizer(DataFlow::Node node) { node instanceof Path::PathNormalization }
62+
}
63+
64+
predicate pathNotNormalized(CustomPathNode source, CustomPathNode sink) {
65+
any(PathNotNormalizedConfiguration config).hasFlowPath(source.asNode1(), sink.asNode1())
66+
}
67+
68+
// ---------------------------------------------------------------------------
69+
// Case 2. The path is normalized at least once, but never checked afterwards.
70+
// ---------------------------------------------------------------------------
71+
/** Configuration to find paths from sources to normalizations that contain no prior normalizations. */
72+
class FirstNormalizationConfiguration extends TaintTracking::Configuration {
73+
FirstNormalizationConfiguration() { this = "FirstNormalizationConfiguration" }
74+
75+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
76+
77+
override predicate isSink(DataFlow::Node sink) { sink instanceof Path::PathNormalization }
3478

35-
override predicate isSanitizer(Sanitizer sanitizer) {
36-
sanitizer instanceof PathSanitizer or
37-
sanitizer instanceof NormalizedPathSanitizer
79+
override predicate isSanitizerOut(DataFlow::Node node) { node instanceof Path::PathNormalization }
80+
}
81+
82+
/** Configuration to find paths from normalizations to sinks that do not go through a check. */
83+
class NormalizedPathNotCheckedConfiguration extends TaintTracking2::Configuration {
84+
NormalizedPathNotCheckedConfiguration() { this = "NormalizedPathNotCheckedConfiguration" }
85+
86+
override predicate isSource(DataFlow::Node source) { source instanceof Path::PathNormalization }
87+
88+
override predicate isSink(DataFlow::Node sink) {
89+
sink = any(FileSystemAccess e).getAPathArgument()
3890
}
3991

40-
override predicate isExtension(TaintTracking::Extension extension) {
41-
extension instanceof AbsPath
92+
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
93+
guard instanceof Path::SafeAccessCheck
4294
}
4395
}
4496

45-
from PathInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
46-
where config.hasFlowPath(src, sink)
47-
select sink.getSink(), src, sink, "This path depends on $@.", src.getSource(),
48-
"a user-provided value"
97+
predicate pathNotCheckedAfterNormalization(CustomPathNode source, CustomPathNode sink) {
98+
exists(
99+
FirstNormalizationConfiguration config, DataFlow::PathNode mid1, DataFlow2::PathNode mid2,
100+
NormalizedPathNotCheckedConfiguration config2
101+
|
102+
config.hasFlowPath(source.asNode1(), mid1) and
103+
config2.hasFlowPath(mid2, sink.asNode2()) and
104+
mid1.getNode().asCfgNode() = mid2.getNode().asCfgNode()
105+
)
106+
}
107+
108+
// ---------------------------------------------------------------------------
109+
// Query: Either case 1 or case 2.
110+
// ---------------------------------------------------------------------------
111+
from CustomPathNode source, CustomPathNode sink
112+
where
113+
pathNotNormalized(source, sink)
114+
or
115+
pathNotCheckedAfterNormalization(source, sink)
116+
select sink, source, sink, "This path depends on $@.", source, "a user-provided value"

python/ql/src/Security/CWE-078/CommandInjection.ql

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,29 +15,51 @@
1515
*/
1616

1717
import python
18-
import semmle.python.security.Paths
19-
/* Sources */
20-
import semmle.python.web.HttpRequest
21-
/* Sinks */
22-
import semmle.python.security.injection.Command
18+
import experimental.dataflow.DataFlow
19+
import experimental.dataflow.TaintTracking
20+
import experimental.semmle.python.Concepts
21+
import experimental.dataflow.RemoteFlowSources
22+
import DataFlow::PathGraph
2323

2424
class CommandInjectionConfiguration extends TaintTracking::Configuration {
25-
CommandInjectionConfiguration() { this = "Command injection configuration" }
25+
CommandInjectionConfiguration() { this = "CommandInjectionConfiguration" }
2626

27-
override predicate isSource(TaintTracking::Source source) {
28-
source instanceof HttpRequestTaintSource
29-
}
30-
31-
override predicate isSink(TaintTracking::Sink sink) { sink instanceof CommandSink }
27+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
3228

33-
override predicate isExtension(TaintTracking::Extension extension) {
34-
extension instanceof FirstElementFlow
35-
or
36-
extension instanceof FabricExecuteExtension
29+
override predicate isSink(DataFlow::Node sink) {
30+
sink = any(SystemCommandExecution e).getCommand() and
31+
// Since the implementation of standard library functions such `os.popen` looks like
32+
// ```py
33+
// def popen(cmd, mode="r", buffering=-1):
34+
// ...
35+
// proc = subprocess.Popen(cmd, ...)
36+
// ```
37+
// any time we would report flow to the `os.popen` sink, we can ALSO report the flow
38+
// from the `cmd` parameter to the `subprocess.Popen` sink -- obviously we don't
39+
// want that.
40+
//
41+
// However, simply removing taint edges out of a sink is not a good enough solution,
42+
// since we would only flag one of the `os.system` calls in the following example
43+
// due to use-use flow
44+
// ```py
45+
// os.system(cmd)
46+
// os.system(cmd)
47+
// ```
48+
//
49+
// Best solution I could come up with is to exclude all sinks inside the modules of
50+
// known sinks. This does have a downside: If we have overlooked a function in any
51+
// of these, that internally runs a command, we no longer give an alert :| -- and we
52+
// need to keep them updated (which is hard to remember)
53+
//
54+
// This does not only affect `os.popen`, but also the helper functions in
55+
// `subprocess`. See:
56+
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
57+
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
58+
not sink.getScope().getEnclosingModule().getName() in ["os", "subprocess", "platform", "popen2"]
3759
}
3860
}
3961

40-
from CommandInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
41-
where config.hasFlowPath(src, sink)
42-
select sink.getSink(), src, sink, "This command depends on $@.", src.getSource(),
62+
from CommandInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
63+
where config.hasFlowPath(source, sink)
64+
select sink.getNode(), source, sink, "This command depends on $@.", source.getNode(),
4365
"a user-provided value"

python/ql/src/Security/CWE-079/ReflectedXss.ql

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,30 +13,26 @@
1313
*/
1414

1515
import python
16-
import semmle.python.security.Paths
17-
/* Sources */
18-
import semmle.python.web.HttpRequest
19-
/* Sinks */
20-
import semmle.python.web.HttpResponse
21-
/* Flow */
22-
import semmle.python.security.strings.Untrusted
16+
import experimental.dataflow.DataFlow
17+
import experimental.dataflow.TaintTracking
18+
import experimental.semmle.python.Concepts
19+
import experimental.dataflow.RemoteFlowSources
20+
import DataFlow::PathGraph
2321

2422
class ReflectedXssConfiguration extends TaintTracking::Configuration {
25-
ReflectedXssConfiguration() { this = "Reflected XSS configuration" }
23+
ReflectedXssConfiguration() { this = "ReflectedXssConfiguration" }
2624

27-
override predicate isSource(TaintTracking::Source source) {
28-
source instanceof HttpRequestTaintSource
29-
}
25+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
3026

31-
override predicate isSink(TaintTracking::Sink sink) {
32-
sink instanceof HttpResponseTaintSink and
33-
not sink instanceof DjangoResponseContent
34-
or
35-
sink instanceof DjangoResponseContentXSSVulnerable
27+
override predicate isSink(DataFlow::Node sink) {
28+
exists(HTTP::Server::HttpResponse response |
29+
response.getMimetype().toLowerCase() = "text/html" and
30+
sink = response.getBody()
31+
)
3632
}
3733
}
3834

39-
from ReflectedXssConfiguration config, TaintedPathSource src, TaintedPathSink sink
40-
where config.hasFlowPath(src, sink)
41-
select sink.getSink(), src, sink, "Cross-site scripting vulnerability due to $@.", src.getSource(),
42-
"a user-provided value"
35+
from ReflectedXssConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
36+
where config.hasFlowPath(source, sink)
37+
select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.",
38+
source.getNode(), "a user-provided value"

python/ql/src/Security/CWE-089/SqlInjection.ql

100755100644
Lines changed: 11 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -12,40 +12,21 @@
1212
*/
1313

1414
import python
15-
import semmle.python.security.Paths
16-
/* Sources */
17-
import semmle.python.web.HttpRequest
18-
/* Sinks */
19-
import semmle.python.security.injection.Sql
20-
import semmle.python.web.django.Db
21-
import semmle.python.web.django.Model
15+
import experimental.dataflow.DataFlow
16+
import experimental.dataflow.TaintTracking
17+
import experimental.semmle.python.Concepts
18+
import experimental.dataflow.RemoteFlowSources
19+
import DataFlow::PathGraph
2220

2321
class SQLInjectionConfiguration extends TaintTracking::Configuration {
24-
SQLInjectionConfiguration() { this = "SQL injection configuration" }
22+
SQLInjectionConfiguration() { this = "SQLInjectionConfiguration" }
2523

26-
override predicate isSource(TaintTracking::Source source) {
27-
source instanceof HttpRequestTaintSource
28-
}
24+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
2925

30-
override predicate isSink(TaintTracking::Sink sink) { sink instanceof SqlInjectionSink }
26+
override predicate isSink(DataFlow::Node sink) { sink = any(SqlExecution e).getSql() }
3127
}
3228

33-
/*
34-
* Additional configuration to support tracking of DB objects. Connections, cursors, etc.
35-
* Without this configuration (or the LegacyConfiguration), the pattern of
36-
* `any(MyTaintKind k).taints(control_flow_node)` used in DbConnectionExecuteArgument would not work.
37-
*/
38-
39-
class DbConfiguration extends TaintTracking::Configuration {
40-
DbConfiguration() { this = "DB configuration" }
41-
42-
override predicate isSource(TaintTracking::Source source) {
43-
source instanceof DjangoModelObjects or
44-
source instanceof DbConnectionSource
45-
}
46-
}
47-
48-
from SQLInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
49-
where config.hasFlowPath(src, sink)
50-
select sink.getSink(), src, sink, "This SQL query depends on $@.", src.getSource(),
29+
from SQLInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
30+
where config.hasFlowPath(source, sink)
31+
select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(),
5132
"a user-provided value"
Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/**
22
* @name Code injection
3-
* @description Interpreting unsanitized user input as code allows a malicious user arbitrary
3+
* @description Interpreting unsanitized user input as code allows a malicious user to perform arbitrary
44
* code execution.
55
* @kind path-problem
66
* @problem.severity error
@@ -15,23 +15,21 @@
1515
*/
1616

1717
import python
18-
import semmle.python.security.Paths
19-
/* Sources */
20-
import semmle.python.web.HttpRequest
21-
/* Sinks */
22-
import semmle.python.security.injection.Exec
18+
import experimental.dataflow.DataFlow
19+
import experimental.dataflow.TaintTracking
20+
import experimental.semmle.python.Concepts
21+
import experimental.dataflow.RemoteFlowSources
22+
import DataFlow::PathGraph
2323

2424
class CodeInjectionConfiguration extends TaintTracking::Configuration {
25-
CodeInjectionConfiguration() { this = "Code injection configuration" }
25+
CodeInjectionConfiguration() { this = "CodeInjectionConfiguration" }
2626

27-
override predicate isSource(TaintTracking::Source source) {
28-
source instanceof HttpRequestTaintSource
29-
}
27+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
3028

31-
override predicate isSink(TaintTracking::Sink sink) { sink instanceof StringEvaluationNode }
29+
override predicate isSink(DataFlow::Node sink) { sink = any(CodeExecution e).getCode() }
3230
}
3331

34-
from CodeInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
35-
where config.hasFlowPath(src, sink)
36-
select sink.getSink(), src, sink, "$@ flows to here and is interpreted as code.", src.getSource(),
37-
"A user-provided value"
32+
from CodeInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
33+
where config.hasFlowPath(source, sink)
34+
select sink.getNode(), source, sink, "$@ flows to here and is interpreted as code.",
35+
source.getNode(), "A user-provided value"

python/ql/src/Security/CWE-502/UnsafeDeserialization.ql

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,26 +12,25 @@
1212
*/
1313

1414
import python
15-
import semmle.python.security.Paths
16-
// Sources -- Any untrusted input
17-
import semmle.python.web.HttpRequest
18-
// Flow -- untrusted string
19-
import semmle.python.security.strings.Untrusted
20-
// Sink -- Unpickling and other deserialization formats.
21-
import semmle.python.security.injection.Pickle
22-
import semmle.python.security.injection.Marshal
23-
import semmle.python.security.injection.Yaml
15+
import experimental.dataflow.DataFlow
16+
import experimental.dataflow.TaintTracking
17+
import experimental.semmle.python.Concepts
18+
import experimental.dataflow.RemoteFlowSources
19+
import DataFlow::PathGraph
2420

2521
class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
26-
UnsafeDeserializationConfiguration() { this = "Unsafe deserialization configuration" }
22+
UnsafeDeserializationConfiguration() { this = "UnsafeDeserializationConfiguration" }
2723

28-
override predicate isSource(TaintTracking::Source source) {
29-
source instanceof HttpRequestTaintSource
30-
}
24+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
3125

32-
override predicate isSink(TaintTracking::Sink sink) { sink instanceof DeserializationSink }
26+
override predicate isSink(DataFlow::Node sink) {
27+
exists(Decoding d |
28+
d.mayExecuteInput() and
29+
sink = d.getAnInput()
30+
)
31+
}
3332
}
3433

35-
from UnsafeDeserializationConfiguration config, TaintedPathSource src, TaintedPathSink sink
36-
where config.hasFlowPath(src, sink)
37-
select sink.getSink(), src, sink, "Deserializing of $@.", src.getSource(), "untrusted input"
34+
from UnsafeDeserializationConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
35+
where config.hasFlowPath(source, sink)
36+
select sink.getNode(), source, sink, "Deserializing of $@.", source.getNode(), "untrusted input"

0 commit comments

Comments
 (0)