Merge pull request github#1912 from jbj/tainttracking-ir-1

rdmarsh2 · web-flow · commit e71a39f6b65c · 2019-09-11T13:44:39.000-07:00
C++: Stub replacement for security.TaintTracking
diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll
@@ -0,0 +1,164 @@
+import cpp
+import semmle.code.cpp.security.Security
+private import semmle.code.cpp.ir.dataflow.DataFlow
+private import semmle.code.cpp.ir.IR
+
+/**
+ * A predictable expression is one where an external user can predict
+ * the value. For example, a literal in the source code is considered
+ * predictable.
+ */
+// TODO: Change to use Instruction instead of Expr. Naive attempt breaks
+// TaintedAllocationSize qltest.
+private predicate predictable(Expr expr) {
+  expr instanceof Literal
+  or
+  exists(BinaryOperation binop | binop = expr |
+    predictable(binop.getLeftOperand()) and predictable(binop.getRightOperand())
+  )
+  or
+  exists(UnaryOperation unop | unop = expr | predictable(unop.getOperand()))
+}
+
+// TODO: remove when `predictable` has an `Instruction` parameter instead of `Expr`.
+private predicate predictableInstruction(Instruction instr) {
+  exists(DataFlow::Node node |
+    node.asInstruction() = instr and
+    predictable(node.asExpr())
+  )
+}
+
+private class DefaultTaintTrackingCfg extends DataFlow::Configuration {
+  DefaultTaintTrackingCfg() { this = "DefaultTaintTrackingCfg" }
+
+  override predicate isSource(DataFlow::Node source) { isUserInput(source.asExpr(), _) }
+
+  override predicate isSink(DataFlow::Node sink) { any() }
+
+  override predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2) {
+    instructionTaintStep(n1.asInstruction(), n2.asInstruction())
+  }
+
+  override predicate isBarrier(DataFlow::Node node) {
+    exists(Variable checkedVar |
+      accessesVariable(node.asInstruction(), checkedVar) and
+      hasUpperBoundsCheck(checkedVar)
+    )
+  }
+}
+
+private predicate accessesVariable(CopyInstruction copy, Variable var) {
+  exists(VariableAddressInstruction va |
+    va.getVariable().getAST() = var
+  |
+    copy.(StoreInstruction).getDestinationAddress() = va
+    or
+    copy.(LoadInstruction).getSourceAddress() = va
+  )
+}
+
+/**
+ * A variable that has any kind of upper-bound check anywhere in the program
+ */
+// TODO: This coarse overapproximation, ported from the old taint tracking
+// library, could be replaced with an actual semantic check that a particular
+// variable _access_ is guarded by an upper-bound check. We probably don't want
+// to do this right away since it could expose a lot of FPs that were
+// previously suppressed by this predicate by coincidence.
+private predicate hasUpperBoundsCheck(Variable var) {
+  exists(RelationalOperation oper, VariableAccess access |
+    oper.getLeftOperand() = access and
+    access.getTarget() = var and
+    // Comparing to 0 is not an upper bound check
+    not oper.getRightOperand().getValue() = "0"
+  )
+}
+
+private predicate instructionTaintStep(Instruction i1, Instruction i2) {
+  // Expressions computed from tainted data are also tainted
+  i2 = any(CallInstruction call |
+      isPureFunction(call.getStaticCallTarget().getName()) and
+      call.getAnArgument() = i1 and
+      forall(Instruction arg | arg = call.getAnArgument() | arg = i1 or predictableInstruction(arg)) and
+      // flow through `strlen` tends to cause dubious results, if the length is
+      // bounded.
+      not call.getStaticCallTarget().getName() = "strlen"
+    )
+  or
+  // Flow through pointer dereference
+  i2.(LoadInstruction).getSourceAddress() = i1
+  or
+  i2.(UnaryInstruction).getUnary() = i1
+  or
+  exists(BinaryInstruction bin |
+    bin = i2 and
+    predictableInstruction(i2.getAnOperand().getDef()) and
+    i1 = i2.getAnOperand().getDef()
+  )
+  // TODO: Check that we have flow from `a` to `a[i]`. It may work for constant
+  // `i` because there is flow through `predictable` `BinaryInstruction` and
+  // through `LoadInstruction`.
+  //
+  // TODO: Flow from argument to return of known functions: Port missing parts
+  // of `returnArgument` to the `interfaces.Taint` and `interfaces.DataFlow`
+  // libraries.
+  //
+  // TODO: Flow from input argument to output argument of known functions: Port
+  // missing parts of `copyValueBetweenArguments` to the `interfaces.Taint` and
+  // `interfaces.DataFlow` libraries and implement call side-effect nodes. This
+  // will help with the test for `ExecTainted.ql`. The test for
+  // `TaintedPath.ql` is more tricky because the output arg is a pointer
+  // addition expression.
+}
+
+predicate tainted(Expr source, Element tainted) {
+  exists(DefaultTaintTrackingCfg cfg, DataFlow::Node sink |
+    cfg.hasFlow(DataFlow::exprNode(source), sink)
+  |
+    // TODO: is it more appropriate to use asConvertedExpr here and avoid
+    // `getConversion*`? Or will that cause us to miss some cases where there's
+    // flow to a conversion (like a `ReferenceDereferenceExpr`) and we want to
+    // pretend there was flow to the converted `Expr` for the sake of
+    // compatibility.
+    sink.asExpr().getConversion*() = tainted
+    or
+    // For compatibility, send flow from arguments to parameters, even for
+    // functions with no body.
+    exists(FunctionCall call, int i |
+      sink.asExpr() = call.getArgument(i) and
+      tainted = resolveCall(call).getParameter(i)
+    )
+    or
+    // For compatibility, send flow into a `Variable` if there is flow to any
+    // Load or Store of that variable.
+    exists(CopyInstruction copy |
+      copy.getSourceValue() = sink.asInstruction() and
+      accessesVariable(copy, tainted) and
+      not hasUpperBoundsCheck(tainted)
+    )
+    or
+    // For compatibility, send flow into a `NotExpr` even if it's part of a
+    // short-circuiting condition and thus might get skipped.
+    tainted.(NotExpr).getOperand() = sink.asExpr()
+  )
+}
+
+predicate taintedIncludingGlobalVars(Expr source, Element tainted, string globalVar) {
+  tainted(source, tainted) and
+  // TODO: Find a way to emulate how `security.TaintTracking` reports the last
+  // global variable that taint has passed through. Also make sure we emulate
+  // its behavior for interprocedural flow through globals.
+  globalVar = ""
+}
+
+GlobalOrNamespaceVariable globalVarFromId(string id) {
+  // TODO: Implement this when `taintedIncludingGlobalVars` has support for
+  // global variables.
+  none()
+}
+
+Function resolveCall(Call call) {
+  // TODO: improve virtual dispatch. This will help in the test for
+  // `UncontrolledProcessOperation.ql`.
+  result = call.getTarget()
+}
diff --git a/cpp/ql/src/semmle/code/cpp/security/TaintTracking.qll b/cpp/ql/src/semmle/code/cpp/security/TaintTracking.qll
@@ -330,13 +330,7 @@ GlobalOrNamespaceVariable globalVarFromId(string id) {
  * A variable that has any kind of upper-bound check anywhere in the program
  */
 private predicate hasUpperBoundsCheck(Variable var) {
-  exists(BinaryOperation oper, VariableAccess access |
-    (
-      oper.getOperator() = "<" or
-      oper.getOperator() = "<=" or
-      oper.getOperator() = ">" or
-      oper.getOperator() = ">="
-    ) and
+  exists(RelationalOperation oper, VariableAccess access |
     oper.getLeftOperand() = access and
     access.getTarget() = var and
     // Comparing to 0 is not an upper bound check