Skip to content

Commit 346a007

Browse files
authored
Merge pull request github#4720 from RasmusWL/python-better-open-models
Python: better models of `open` function
2 parents 028a72b + e2c4af3 commit 346a007

File tree

3 files changed

+207
-137
lines changed

3 files changed

+207
-137
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* Modeling of file system access has been improved to recognize `io.open` and `builtins.open`.

python/ql/src/semmle/python/frameworks/Stdlib.qll

Lines changed: 192 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -651,7 +651,7 @@ private module Stdlib {
651651
* WARNING: Only holds for a few predefined attributes.
652652
*/
653653
private DataFlow::Node builtins_attr(DataFlow::TypeTracker t, string attr_name) {
654-
attr_name in ["exec", "eval", "compile"] and
654+
attr_name in ["exec", "eval", "compile", "open"] and
655655
(
656656
t.start() and
657657
result = DataFlow::importNode(["builtins", "__builtin__"] + "." + attr_name)
@@ -728,165 +728,222 @@ private module Stdlib {
728728
)
729729
}
730730
}
731-
}
732731

733-
/**
734-
* An exec statement (only Python 2).
735-
* Se ehttps://docs.python.org/2/reference/simple_stmts.html#the-exec-statement.
736-
*/
737-
private class ExecStatement extends CodeExecution::Range {
738-
ExecStatement() {
739-
// since there are no DataFlow::Nodes for a Statement, we can't do anything like
740-
// `this = any(Exec exec)`
741-
this.asExpr() = any(Exec exec).getBody()
742-
}
732+
/**
733+
* A call to the builtin `open` function.
734+
* See https://docs.python.org/3/library/functions.html#open
735+
*/
736+
private class OpenCall extends FileSystemAccess::Range, DataFlow::CfgNode {
737+
override CallNode node;
743738

744-
override DataFlow::Node getCode() { result = this }
745-
}
739+
OpenCall() {
740+
node.getFunction() = builtins_attr("open").asCfgNode()
741+
or
742+
node.getFunction() = io_attr("open").asCfgNode()
743+
}
746744

747-
/**
748-
* A call to the builtin `open` function.
749-
* See https://docs.python.org/3/library/functions.html#open
750-
*/
751-
private class OpenCall extends FileSystemAccess::Range, DataFlow::CfgNode {
752-
override CallNode node;
745+
override DataFlow::Node getAPathArgument() {
746+
result.asCfgNode() in [node.getArg(0), node.getArgByName("file")]
747+
}
748+
}
753749

754-
OpenCall() { node.getFunction().(NameNode).getId() = "open" }
750+
/**
751+
* An exec statement (only Python 2).
752+
* Se ehttps://docs.python.org/2/reference/simple_stmts.html#the-exec-statement.
753+
*/
754+
private class ExecStatement extends CodeExecution::Range {
755+
ExecStatement() {
756+
// since there are no DataFlow::Nodes for a Statement, we can't do anything like
757+
// `this = any(Exec exec)`
758+
this.asExpr() = any(Exec exec).getBody()
759+
}
755760

756-
override DataFlow::Node getAPathArgument() {
757-
result.asCfgNode() in [node.getArg(0), node.getArgByName("file")]
761+
override DataFlow::Node getCode() { result = this }
758762
}
759-
}
760763

761-
// ---------------------------------------------------------------------------
762-
// base64
763-
// ---------------------------------------------------------------------------
764-
/** Gets a reference to the `base64` module. */
765-
private DataFlow::Node base64(DataFlow::TypeTracker t) {
766-
t.start() and
767-
result = DataFlow::importNode("base64")
768-
or
769-
exists(DataFlow::TypeTracker t2 | result = base64(t2).track(t2, t))
770-
}
764+
// ---------------------------------------------------------------------------
765+
// base64
766+
// ---------------------------------------------------------------------------
767+
/** Gets a reference to the `base64` module. */
768+
private DataFlow::Node base64(DataFlow::TypeTracker t) {
769+
t.start() and
770+
result = DataFlow::importNode("base64")
771+
or
772+
exists(DataFlow::TypeTracker t2 | result = base64(t2).track(t2, t))
773+
}
771774

772-
/** Gets a reference to the `base64` module. */
773-
DataFlow::Node base64() { result = base64(DataFlow::TypeTracker::end()) }
775+
/** Gets a reference to the `base64` module. */
776+
DataFlow::Node base64() { result = base64(DataFlow::TypeTracker::end()) }
774777

775-
/**
776-
* Gets a reference to the attribute `attr_name` of the `base64` module.
777-
* WARNING: Only holds for a few predefined attributes.
778-
*/
779-
private DataFlow::Node base64_attr(DataFlow::TypeTracker t, string attr_name) {
780-
attr_name in [
781-
"b64encode", "b64decode", "standard_b64encode", "standard_b64decode", "urlsafe_b64encode",
782-
"urlsafe_b64decode", "b32encode", "b32decode", "b16encode", "b16decode", "encodestring",
783-
"decodestring", "a85encode", "a85decode", "b85encode", "b85decode", "encodebytes",
784-
"decodebytes"
785-
] and
786-
(
787-
t.start() and
788-
result = DataFlow::importNode("base64" + "." + attr_name)
778+
/**
779+
* Gets a reference to the attribute `attr_name` of the `base64` module.
780+
* WARNING: Only holds for a few predefined attributes.
781+
*/
782+
private DataFlow::Node base64_attr(DataFlow::TypeTracker t, string attr_name) {
783+
attr_name in [
784+
"b64encode", "b64decode", "standard_b64encode", "standard_b64decode", "urlsafe_b64encode",
785+
"urlsafe_b64decode", "b32encode", "b32decode", "b16encode", "b16decode", "encodestring",
786+
"decodestring", "a85encode", "a85decode", "b85encode", "b85decode", "encodebytes",
787+
"decodebytes"
788+
] and
789+
(
790+
t.start() and
791+
result = DataFlow::importNode("base64" + "." + attr_name)
792+
or
793+
t.startInAttr(attr_name) and
794+
result = base64()
795+
)
789796
or
790-
t.startInAttr(attr_name) and
791-
result = base64()
792-
)
793-
or
794-
// Due to bad performance when using normal setup with `base64_attr(t2, attr_name).track(t2, t)`
795-
// we have inlined that code and forced a join
796-
exists(DataFlow::TypeTracker t2 |
797-
exists(DataFlow::StepSummary summary |
798-
base64_attr_first_join(t2, attr_name, result, summary) and
799-
t = t2.append(summary)
797+
// Due to bad performance when using normal setup with `base64_attr(t2, attr_name).track(t2, t)`
798+
// we have inlined that code and forced a join
799+
exists(DataFlow::TypeTracker t2 |
800+
exists(DataFlow::StepSummary summary |
801+
base64_attr_first_join(t2, attr_name, result, summary) and
802+
t = t2.append(summary)
803+
)
800804
)
801-
)
802-
}
803-
804-
pragma[nomagic]
805-
private predicate base64_attr_first_join(
806-
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
807-
) {
808-
DataFlow::StepSummary::step(base64_attr(t2, attr_name), res, summary)
809-
}
805+
}
810806

811-
/**
812-
* Gets a reference to the attribute `attr_name` of the `base64` module.
813-
* WARNING: Only holds for a few predefined attributes.
814-
*/
815-
private DataFlow::Node base64_attr(string attr_name) {
816-
result = base64_attr(DataFlow::TypeTracker::end(), attr_name)
817-
}
807+
pragma[nomagic]
808+
private predicate base64_attr_first_join(
809+
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
810+
) {
811+
DataFlow::StepSummary::step(base64_attr(t2, attr_name), res, summary)
812+
}
818813

819-
/** A call to any of the encode functions in the `base64` module. */
820-
private class Base64EncodeCall extends Encoding::Range, DataFlow::CfgNode {
821-
override CallNode node;
822-
823-
Base64EncodeCall() {
824-
exists(string name |
825-
name in [
826-
"b64encode", "standard_b64encode", "urlsafe_b64encode", "b32encode", "b16encode",
827-
"encodestring", "a85encode", "b85encode", "encodebytes"
828-
] and
829-
node.getFunction() = base64_attr(name).asCfgNode()
830-
)
814+
/**
815+
* Gets a reference to the attribute `attr_name` of the `base64` module.
816+
* WARNING: Only holds for a few predefined attributes.
817+
*/
818+
private DataFlow::Node base64_attr(string attr_name) {
819+
result = base64_attr(DataFlow::TypeTracker::end(), attr_name)
831820
}
832821

833-
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
822+
/** A call to any of the encode functions in the `base64` module. */
823+
private class Base64EncodeCall extends Encoding::Range, DataFlow::CfgNode {
824+
override CallNode node;
834825

835-
override DataFlow::Node getOutput() { result = this }
826+
Base64EncodeCall() {
827+
exists(string name |
828+
name in [
829+
"b64encode", "standard_b64encode", "urlsafe_b64encode", "b32encode", "b16encode",
830+
"encodestring", "a85encode", "b85encode", "encodebytes"
831+
] and
832+
node.getFunction() = base64_attr(name).asCfgNode()
833+
)
834+
}
836835

837-
override string getFormat() {
838-
exists(string name | node.getFunction() = base64_attr(name).asCfgNode() |
839-
name in [
840-
"b64encode", "standard_b64encode", "urlsafe_b64encode", "encodestring", "encodebytes"
841-
] and
842-
result = "Base64"
843-
or
844-
name = "b32encode" and result = "Base32"
845-
or
846-
name = "b16encode" and result = "Base16"
847-
or
848-
name = "a85encode" and result = "Ascii85"
849-
or
850-
name = "b85encode" and result = "Base85"
851-
)
852-
}
853-
}
836+
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
854837

855-
/** A call to any of the decode functions in the `base64` module. */
856-
private class Base64DecodeCall extends Decoding::Range, DataFlow::CfgNode {
857-
override CallNode node;
858-
859-
Base64DecodeCall() {
860-
exists(string name |
861-
name in [
862-
"b64decode", "standard_b64decode", "urlsafe_b64decode", "b32decode", "b16decode",
863-
"decodestring", "a85decode", "b85decode", "decodebytes"
864-
] and
865-
node.getFunction() = base64_attr(name).asCfgNode()
866-
)
838+
override DataFlow::Node getOutput() { result = this }
839+
840+
override string getFormat() {
841+
exists(string name | node.getFunction() = base64_attr(name).asCfgNode() |
842+
name in [
843+
"b64encode", "standard_b64encode", "urlsafe_b64encode", "encodestring", "encodebytes"
844+
] and
845+
result = "Base64"
846+
or
847+
name = "b32encode" and result = "Base32"
848+
or
849+
name = "b16encode" and result = "Base16"
850+
or
851+
name = "a85encode" and result = "Ascii85"
852+
or
853+
name = "b85encode" and result = "Base85"
854+
)
855+
}
867856
}
868857

869-
override predicate mayExecuteInput() { none() }
858+
/** A call to any of the decode functions in the `base64` module. */
859+
private class Base64DecodeCall extends Decoding::Range, DataFlow::CfgNode {
860+
override CallNode node;
861+
862+
Base64DecodeCall() {
863+
exists(string name |
864+
name in [
865+
"b64decode", "standard_b64decode", "urlsafe_b64decode", "b32decode", "b16decode",
866+
"decodestring", "a85decode", "b85decode", "decodebytes"
867+
] and
868+
node.getFunction() = base64_attr(name).asCfgNode()
869+
)
870+
}
870871

871-
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
872+
override predicate mayExecuteInput() { none() }
872873

873-
override DataFlow::Node getOutput() { result = this }
874+
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
874875

875-
override string getFormat() {
876-
exists(string name | node.getFunction() = base64_attr(name).asCfgNode() |
877-
name in [
878-
"b64decode", "standard_b64decode", "urlsafe_b64decode", "decodestring", "decodebytes"
879-
] and
880-
result = "Base64"
881-
or
882-
name = "b32decode" and result = "Base32"
883-
or
884-
name = "b16decode" and result = "Base16"
885-
or
886-
name = "a85decode" and result = "Ascii85"
876+
override DataFlow::Node getOutput() { result = this }
877+
878+
override string getFormat() {
879+
exists(string name | node.getFunction() = base64_attr(name).asCfgNode() |
880+
name in [
881+
"b64decode", "standard_b64decode", "urlsafe_b64decode", "decodestring", "decodebytes"
882+
] and
883+
result = "Base64"
884+
or
885+
name = "b32decode" and result = "Base32"
886+
or
887+
name = "b16decode" and result = "Base16"
888+
or
889+
name = "a85decode" and result = "Ascii85"
890+
or
891+
name = "b85decode" and result = "Base85"
892+
)
893+
}
894+
}
895+
896+
// ---------------------------------------------------------------------------
897+
// io
898+
// ---------------------------------------------------------------------------
899+
/** Gets a reference to the `io` module. */
900+
private DataFlow::Node io(DataFlow::TypeTracker t) {
901+
t.start() and
902+
result = DataFlow::importNode("io")
903+
or
904+
exists(DataFlow::TypeTracker t2 | result = io(t2).track(t2, t))
905+
}
906+
907+
/** Gets a reference to the `io` module. */
908+
DataFlow::Node io() { result = io(DataFlow::TypeTracker::end()) }
909+
910+
/**
911+
* Gets a reference to the attribute `attr_name` of the `io` module.
912+
* WARNING: Only holds for a few predefined attributes.
913+
*/
914+
private DataFlow::Node io_attr(DataFlow::TypeTracker t, string attr_name) {
915+
attr_name in ["open"] and
916+
(
917+
t.start() and
918+
result = DataFlow::importNode("io" + "." + attr_name)
887919
or
888-
name = "b85decode" and result = "Base85"
920+
t.startInAttr(attr_name) and
921+
result = io()
889922
)
923+
or
924+
// Due to bad performance when using normal setup with `io_attr(t2, attr_name).track(t2, t)`
925+
// we have inlined that code and forced a join
926+
exists(DataFlow::TypeTracker t2 |
927+
exists(DataFlow::StepSummary summary |
928+
io_attr_first_join(t2, attr_name, result, summary) and
929+
t = t2.append(summary)
930+
)
931+
)
932+
}
933+
934+
pragma[nomagic]
935+
private predicate io_attr_first_join(
936+
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
937+
) {
938+
DataFlow::StepSummary::step(io_attr(t2, attr_name), res, summary)
939+
}
940+
941+
/**
942+
* Gets a reference to the attribute `attr_name` of the `io` module.
943+
* WARNING: Only holds for a few predefined attributes.
944+
*/
945+
private DataFlow::Node io_attr(string attr_name) {
946+
result = io_attr(DataFlow::TypeTracker::end(), attr_name)
890947
}
891948
}
892949

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,18 @@
1+
import builtins
2+
import io
3+
14
open("filepath") # $getAPathArgument="filepath"
25
open(file="filepath") # $getAPathArgument="filepath"
36

47
o = open
58

6-
o("filepath") # $ MISSING: getAPathArgument="filepath"
7-
o(file="filepath") # $ MISSING: getAPathArgument="filepath"
9+
o("filepath") # $getAPathArgument="filepath"
10+
o(file="filepath") # $getAPathArgument="filepath"
11+
12+
13+
builtins.open("filepath") # $getAPathArgument="filepath"
14+
builtins.open(file="filepath") # $getAPathArgument="filepath"
15+
16+
17+
io.open("filepath") # $getAPathArgument="filepath"
18+
io.open(file="filepath") # $getAPathArgument="filepath"

0 commit comments

Comments
 (0)