CodingLi
diff --git a/‎python/ql/src/experimental/Security-old-dataflow/CWE-022/PathInjection.qhelp
Lines changed: 61 additions & 0 deletions b/‎python/ql/src/experimental/Security-old-dataflow/CWE-022/PathInjection.qhelp
Lines changed: 61 additions & 0 deletions
diff --git a/‎python/ql/src/experimental/Security-old-dataflow/CWE-022/PathInjection.ql
Lines changed: 48 additions & 0 deletions b/‎python/ql/src/experimental/Security-old-dataflow/CWE-022/PathInjection.ql
Lines changed: 48 additions & 0 deletions
diff --git a/‎python/ql/src/experimental/Security-old-dataflow/CWE-022/TarSlip.qhelp
Lines changed: 75 additions & 0 deletions b/‎python/ql/src/experimental/Security-old-dataflow/CWE-022/TarSlip.qhelp
Lines changed: 75 additions & 0 deletions
diff --git a/‎python/ql/src/experimental/Security-old-dataflow/CWE-022/TarSlip.ql
Lines changed: 181 additions & 0 deletions b/‎python/ql/src/experimental/Security-old-dataflow/CWE-022/TarSlip.ql
Lines changed: 181 additions & 0 deletions
@@ -0,0 +1,61 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+
+<overview>
+<p>
+Accessing files using paths constructed from user-controlled data can allow an attacker to access
+unexpected resources. This can result in sensitive information being revealed or deleted, or an
+attacker being able to influence behavior by modifying unexpected files.
+</p>
+</overview>
+
+<recommendation>
+<p>
+Validate user input before using it to construct a file path, either using an off-the-shelf library function
+like <code>werkzeug.utils.secure_filename</code>, or by performing custom validation.
+</p>
+
+<p>
+Ideally, follow these rules:
+</p>
+
+<ul>
+<li>Do not allow more than a single "." character.</li>
+<li>Do not allow directory separators such as "/" or "\" (depending on the file system).</li>
+<li>Do not rely on simply replacing problematic sequences such as "../". For example, after
+applying this filter to ".../...//", the resulting string would still be "../".</li>
+<li>Use an allowlist of known good patterns.</li>
+</ul>
+</recommendation>
+
+<example>
+<p>
+In the first example, a file name is read from an HTTP request and then used to access a file.
+However, a malicious user could enter a file name that is an absolute path, such as
+<code>"/etc/passwd"</code>.
+</p>
+
+<p>
+In the second example, it appears that the user is restricted to opening a file within the
+<code>"user"</code> home directory. However, a malicious user could enter a file name containing
+special characters. For example, the string <code>"../../../etc/passwd"</code> will result in the code
+reading the file located at <code>"/server/static/images/../../../etc/passwd"</code>, which is the system's
+password file. This file would then be sent back to the user, giving them access to all the
+system's passwords.
+</p>
+
+<p>
+In the third example, the path used to access the file system is normalized <em>before</em> being checked against a
+known prefix. This ensures that regardless of the user input, the resulting path is safe.
+</p>
+
+<sample src="examples/tainted_path.py" />
+</example>
+
+<references>
+<li>OWASP: <a href="https://owasp.org/www-community/attacks/Path_Traversal">Path Traversal</a>.</li>
+<li>npm: <a href="http://werkzeug.pocoo.org/docs/utils/#werkzeug.utils.secure_filename">werkzeug.utils.secure_filename</a>.</li>
+</references>
+</qhelp>
@@ -0,0 +1,48 @@
+/**
+ * @name Uncontrolled data used in path expression
+ * @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
+ * @kind path-problem
+ * @problem.severity error
+ * @sub-severity high
+ * @precision high
+ * @id py/path-injection
+ * @tags correctness
+ *       security
+ *       external/owasp/owasp-a1
+ *       external/cwe/cwe-022
+ *       external/cwe/cwe-023
+ *       external/cwe/cwe-036
+ *       external/cwe/cwe-073
+ *       external/cwe/cwe-099
+ */
+
+import python
+import semmle.python.security.Paths
+/* Sources */
+import semmle.python.web.HttpRequest
+/* Sinks */
+import semmle.python.security.injection.Path
+
+class PathInjectionConfiguration extends TaintTracking::Configuration {
+  PathInjectionConfiguration() { this = "Path injection configuration" }
+
+  override predicate isSource(TaintTracking::Source source) {
+    source instanceof HttpRequestTaintSource
+  }
+
+  override predicate isSink(TaintTracking::Sink sink) { sink instanceof OpenNode }
+
+  override predicate isSanitizer(Sanitizer sanitizer) {
+    sanitizer instanceof PathSanitizer or
+    sanitizer instanceof NormalizedPathSanitizer
+  }
+
+  override predicate isExtension(TaintTracking::Extension extension) {
+    extension instanceof AbsPath
+  }
+}
+
+from PathInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
+where config.hasFlowPath(src, sink)
+select sink.getSink(), src, sink, "This path depends on $@.", src.getSource(),
+  "a user-provided value"
@@ -0,0 +1,75 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+
+<overview>
+<p>Extracting files from a malicious tar archive without validating that the destination file path
+is within the destination directory can cause files outside the destination directory to be
+overwritten, due to the possible presence of directory traversal elements (<code>..</code>) in
+archive paths.</p>
+
+<p>Tar archives contain archive entries representing each file in the archive. These entries
+include a file path for the entry, but these file paths are not restricted and may contain
+unexpected special elements such as the directory traversal element (<code>..</code>). If these
+file paths are used to determine an output file to write the contents of the archive item to, then
+the file may be written to an unexpected ___location. This can result in sensitive information being
+revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
+files.</p>
+
+<p>For example, if a tar archive contains a file entry <code>..\sneaky-file</code>, and the tar archive
+is extracted to the directory <code>c:\output</code>, then naively combining the paths would result
+in an output file path of <code>c:\output\..\sneaky-file</code>, which would cause the file to be
+written to <code>c:\sneaky-file</code>.</p>
+
+</overview>
+<recommendation>
+
+<p>Ensure that output paths constructed from tar archive entries are validated
+to prevent writing files to unexpected locations.</p>
+
+<p>The recommended way of writing an output file from a tar archive entry is to check that
+<code>".."</code> does not occur in the path.
+</p>
+
+</recommendation>
+
+<example>
+<p>
+In this example an archive is extracted without validating file paths.
+If <code>archive.tar</code> contained relative paths (for
+instance, if it were created by something like <code>tar -cf archive.tar
+../file.txt</code>) then executing this code could write to locations
+outside the destination directory.
+</p>
+
+<sample src="examples/tarslip_bad.py" />
+
+<p>To fix this vulnerability, we need to check that the path does not
+contain any <code>".."</code> elements in it.
+</p>
+
+<sample src="examples/tarslip_good.py" />
+
+</example>
+<references>
+
+<li>
+Snyk:
+<a href="https://snyk.io/research/zip-slip-vulnerability">Zip Slip Vulnerability</a>.
+</li>
+<li>
+OWASP:
+<a href="https://owasp.org/www-community/attacks/Path_Traversal">Path Traversal</a>.
+</li>
+<li>
+Python Library Reference:
+<a href="https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extract">TarFile.extract</a>.
+</li>
+<li>
+Python Library Reference:
+<a href="https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall">TarFile.extractall</a>.
+</li>
+
+</references>
+</qhelp>
@@ -0,0 +1,181 @@
+/**
+ * @name Arbitrary file write during tarfile extraction
+ * @description Extracting files from a malicious tar archive without validating that the
+ *              destination file path is within the destination directory can cause files outside
+ *              the destination directory to be overwritten.
+ * @kind path-problem
+ * @id py/tarslip
+ * @problem.severity error
+ * @precision medium
+ * @tags security
+ *       external/cwe/cwe-022
+ */
+
+import python
+import semmle.python.security.Paths
+import semmle.python.dataflow.TaintTracking
+import semmle.python.security.strings.Basic
+
+/** A TaintKind to represent open tarfile objects. That is, the result of calling `tarfile.open(...)` */
+class OpenTarFile extends TaintKind {
+  OpenTarFile() { this = "tarfile.open" }
+
+  override TaintKind getTaintOfMethodResult(string name) {
+    name = "getmember" and result instanceof TarFileInfo
+    or
+    name = "getmembers" and result.(SequenceKind).getItem() instanceof TarFileInfo
+  }
+
+  override ClassValue getType() { result = Value::named("tarfile.TarFile") }
+
+  override TaintKind getTaintForIteration() { result instanceof TarFileInfo }
+}
+
+/** The source of open tarfile objects. That is, any call to `tarfile.open(...)` */
+class TarfileOpen extends TaintSource {
+  TarfileOpen() {
+    Value::named("tarfile.open").getACall() = this and
+    /*
+     * If argument refers to a string object, then it's a hardcoded path and
+     * this tarfile is safe.
+     */
+
+    not this.(CallNode).getAnArg().pointsTo(any(StringValue str)) and
+    /* Ignore opens within the tarfile module itself */
+    not this.(ControlFlowNode).getLocation().getFile().getBaseName() = "tarfile.py"
+  }
+
+  override predicate isSourceOf(TaintKind kind) { kind instanceof OpenTarFile }
+}
+
+class TarFileInfo extends TaintKind {
+  TarFileInfo() { this = "tarfile.entry" }
+
+  override TaintKind getTaintOfMethodResult(string name) { name = "next" and result = this }
+
+  override TaintKind getTaintOfAttribute(string name) {
+    name = "name" and result instanceof TarFileInfo
+  }
+}
+
+/*
+ * For efficiency we don't want to track the flow of taint
+ * around the tarfile module.
+ */
+
+class ExcludeTarFilePy extends Sanitizer {
+  ExcludeTarFilePy() { this = "Tar sanitizer" }
+
+  override predicate sanitizingNode(TaintKind taint, ControlFlowNode node) {
+    node.getLocation().getFile().getBaseName() = "tarfile.py" and
+    (
+      taint instanceof OpenTarFile
+      or
+      taint instanceof TarFileInfo
+      or
+      taint.(SequenceKind).getItem() instanceof TarFileInfo
+    )
+  }
+}
+
+/* Any call to an extractall method */
+class ExtractAllSink extends TaintSink {
+  CallNode call;
+
+  ExtractAllSink() {
+    this = call.getFunction().(AttrNode).getObject("extractall") and
+    count(call.getAnArg()) = 0
+  }
+
+  override predicate sinks(TaintKind kind) { kind instanceof OpenTarFile }
+}
+
+/* Argument to extract method */
+class ExtractSink extends TaintSink {
+  CallNode call;
+
+  ExtractSink() {
+    call.getFunction().(AttrNode).getName() = "extract" and
+    this = call.getArg(0)
+  }
+
+  override predicate sinks(TaintKind kind) { kind instanceof TarFileInfo }
+}
+
+/* Members argument to extract method */
+class ExtractMembersSink extends TaintSink {
+  CallNode call;
+
+  ExtractMembersSink() {
+    call.getFunction().(AttrNode).getName() = "extractall" and
+    (this = call.getArg(0) or this = call.getArgByName("members"))
+  }
+
+  override predicate sinks(TaintKind kind) {
+    kind.(SequenceKind).getItem() instanceof TarFileInfo
+    or
+    kind instanceof OpenTarFile
+  }
+}
+
+class TarFileInfoSanitizer extends Sanitizer {
+  TarFileInfoSanitizer() { this = "TarInfo sanitizer" }
+
+  /** The test `if <path_sanitizing_test>:` clears taint on its `false` edge. */
+  override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) {
+    taint instanceof TarFileInfo and
+    clears_taint_on_false_edge(test.getTest(), test.getSense())
+  }
+
+  private predicate clears_taint_on_false_edge(ControlFlowNode test, boolean sense) {
+    path_sanitizing_test(test) and
+    sense = false
+    or
+    // handle `not` (also nested)
+    test.(UnaryExprNode).getNode().getOp() instanceof Not and
+    clears_taint_on_false_edge(test.(UnaryExprNode).getOperand(), sense.booleanNot())
+  }
+}
+
+private predicate path_sanitizing_test(ControlFlowNode test) {
+  /* Assume that any test with "path" in it is a sanitizer */
+  test.getAChild+().(AttrNode).getName().matches("%path")
+  or
+  test.getAChild+().(NameNode).getId().matches("%path")
+}
+
+class TarSlipConfiguration extends TaintTracking::Configuration {
+  TarSlipConfiguration() { this = "TarSlip configuration" }
+
+  override predicate isSource(TaintTracking::Source source) { source instanceof TarfileOpen }
+
+  override predicate isSink(TaintTracking::Sink sink) {
+    sink instanceof ExtractSink or
+    sink instanceof ExtractAllSink or
+    sink instanceof ExtractMembersSink
+  }
+
+  override predicate isSanitizer(Sanitizer sanitizer) {
+    sanitizer instanceof TarFileInfoSanitizer
+    or
+    sanitizer instanceof ExcludeTarFilePy
+  }
+
+  override predicate isBarrier(DataFlow::Node node) {
+    // Avoid flow into the tarfile module
+    exists(ParameterDefinition def |
+      node.asVariable().getDefinition() = def
+      or
+      node.asCfgNode() = def.getDefiningNode()
+    |
+      def.getScope() = Value::named("tarfile.open").(CallableValue).getScope()
+      or
+      def.isSelf() and def.getScope().getEnclosingModule().getName() = "tarfile"
+    )
+  }
+}
+
+from TarSlipConfiguration config, TaintedPathSource src, TaintedPathSink sink
+where config.hasFlowPath(src, sink)
+select sink.getSink(), src, sink, "Extraction of tarfile from $@", src.getSource(),
+  "a potentially untrusted source"