Skip to content

Commit 7165959

Browse files
committed
C++: Let data flow past definition by reference
This commit changes how data flow works in the following code. MyType x = source(); defineByReference(&x); sink(x); The question here is whether there should be flow from `source` to `sink`. Such flow is desirable if `defineByReference` doesn't write to all of `x`, but it's undesirable if `defineByReference` is a typical init function in `C` that writes to every field or if `defineByReference` is `memcpy` or `memset` on the full range. Before 1.20.0, there would be flow from `source` to `sink` in case `x` happened to be modeled with `BlockVar` but not in case `x` happened to be modelled with SSA. The choice of modelling depends on an analysis of how `x` is used elsewhere in the function, and it's supposed to be an internal implementation detail that there are two ways to model variables. In 1.20.0, I changed the `BlockVar` behavior so it worked the same as SSA, never allowing that flow. It turns out that this change broke a customer's query. This commit reverts `BlockVar` to its old behavior of letting flow propagate past the `defineByReference` call and then regains consistency by changing all variables that are ever defined by reference to be modelled with `BlockVar` instead of SSA. This means we now get too much flow in certain cases, but that appears to be better overall than getting too little flow. See also the discussion in CPP-336.
1 parent 52d8ca0 commit 7165959

File tree

7 files changed

+68
-22
lines changed

7 files changed

+68
-22
lines changed

cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,17 @@ module FlowVar_internal {
9393
* - Supporting fields, globals and statics like the Java SSA library does.
9494
* - Supporting all local variables, even if their address is taken by
9595
* address-of, reference assignments, or lambdas.
96+
* - Understanding that assignment to a field of a local struct is a
97+
* definition of the struct but not a complete overwrite. This is what the
98+
* IR library uses chi nodes for.
9699
*/
97100
predicate fullySupportedSsaVariable(Variable v) {
98101
v = any(SsaDefinition def).getAVariable() and
102+
// After `foo(&x.field)` we need for there to be two definitions of `x`:
103+
// the one that existed before the call to `foo` and the def-by-ref from
104+
// the call. It's fundamental in SSA that each use is only associated with
105+
// one def, so we can't easily get this effect with SSA.
106+
not definitionByReference(v.getAnAccess(), _) and
99107
// SSA variables do not exist before their first assignment, but one
100108
// feature of this data flow library is to track where uninitialized data
101109
// ends up.
@@ -183,8 +191,7 @@ module FlowVar_internal {
183191
}
184192

185193
override predicate definedByReference(Expr arg) {
186-
definitionByReference(v.getAnAccess(), arg) and
187-
arg = def.getDefinition()
194+
none() // Not supported for SSA. See `fullySupportedSsaVariable`.
188195
}
189196

190197
override predicate definedByInitialValue(LocalScopeVariable param) {
@@ -204,8 +211,6 @@ module FlowVar_internal {
204211
this.definedByExpr(_, _)
205212
or
206213
this.definedByInitialValue(_)
207-
or
208-
this.definedByReference(_)
209214
}
210215

211216
/**
@@ -236,10 +241,7 @@ module FlowVar_internal {
236241
BlockVar() { this = TBlockVar(sbb, v) }
237242

238243
override VariableAccess getAnAccess() {
239-
exists(SubBasicBlock reached |
240-
reached = getAReachedBlockVarSBB(this) and
241-
variableAccessInSBB(v, reached, result)
242-
)
244+
variableAccessInSBB(v, getAReachedBlockVarSBB(this), result)
243245
}
244246

245247
override predicate definedByInitialValue(LocalScopeVariable lsv) {
@@ -401,8 +403,7 @@ module FlowVar_internal {
401403
mid = getAReachedBlockVarSBB(start) and
402404
result = mid.getASuccessor() and
403405
not skipLoop(mid, result, sbbDef, v) and
404-
not assignmentLikeOperation(result, v, _) and
405-
not blockVarDefinedByReference(result, v, _)
406+
not assignmentLikeOperation(result, v, _)
406407
)
407408
}
408409

@@ -413,12 +414,6 @@ module FlowVar_internal {
413414
va.getTarget() = v and
414415
va = sbb.getANode() and
415416
not overwrite(va, _)
416-
or
417-
// Allow flow into a `VariableAccess` that is used as definition by
418-
// reference. This flow is blocked by `getAReachedBlockVarSBB` because
419-
// flow should not propagate past that.
420-
va = sbb.getASuccessor().(VariableAccess) and
421-
blockVarDefinedByReference(va, v, _)
422417
}
423418

424419
/**
@@ -516,9 +511,6 @@ module FlowVar_internal {
516511
*/
517512
predicate overwrite(VariableAccess va, ControlFlowNode node) {
518513
va = node.(AssignExpr).getLValue()
519-
or
520-
va = node and
521-
definitionByReference(node, _)
522514
}
523515

524516
/**

cpp/ql/test/library-tests/dataflow/dataflow-tests/localFlow.expected

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
| example.c:15:37:15:37 | b | example.c:19:6:19:6 | b |
22
| example.c:15:44:15:46 | pos | example.c:24:24:24:26 | pos |
33
| example.c:15:44:15:46 | pos | example.c:28:23:28:25 | pos |
4+
| example.c:17:19:17:22 | {...} | example.c:24:2:24:7 | coords |
45
| example.c:17:19:17:22 | {...} | example.c:24:13:24:18 | coords |
6+
| example.c:17:19:17:22 | {...} | example.c:26:2:26:7 | coords |
7+
| example.c:17:19:17:22 | {...} | example.c:26:19:26:24 | coords |
58
| example.c:24:13:24:30 | ... = ... | example.c:24:2:24:30 | ... = ... |
69
| example.c:24:24:24:30 | ... + ... | example.c:24:13:24:30 | ... = ... |
710
| example.c:26:13:26:16 | call to getX | example.c:26:2:26:25 | ... = ... |
811
| example.c:26:18:26:24 | ref arg & ... | example.c:26:2:26:7 | coords |
12+
| example.c:26:18:26:24 | ref arg & ... | example.c:26:19:26:24 | coords |
13+
| example.c:28:22:28:25 | ref arg & ... | example.c:28:23:28:25 | pos |
914
| test.cpp:6:12:6:17 | call to source | test.cpp:7:8:7:9 | t1 |
1015
| test.cpp:6:12:6:17 | call to source | test.cpp:8:8:8:9 | t1 |
1116
| test.cpp:6:12:6:17 | call to source | test.cpp:9:8:9:9 | t1 |
@@ -31,14 +36,22 @@
3136
| test.cpp:24:10:24:11 | t2 | test.cpp:26:8:26:9 | t1 |
3237
| test.cpp:430:48:430:54 | source1 | test.cpp:432:17:432:23 | source1 |
3338
| test.cpp:431:12:431:13 | 0 | test.cpp:432:11:432:13 | tmp |
39+
| test.cpp:431:12:431:13 | 0 | test.cpp:432:33:432:35 | tmp |
40+
| test.cpp:431:12:431:13 | 0 | test.cpp:433:8:433:10 | tmp |
3441
| test.cpp:432:10:432:13 | & ... | test.cpp:432:3:432:8 | call to memcpy |
42+
| test.cpp:432:10:432:13 | ref arg & ... | test.cpp:432:11:432:13 | tmp |
43+
| test.cpp:432:10:432:13 | ref arg & ... | test.cpp:432:33:432:35 | tmp |
3544
| test.cpp:432:10:432:13 | ref arg & ... | test.cpp:433:8:433:10 | tmp |
3645
| test.cpp:432:17:432:23 | source1 | test.cpp:432:10:432:13 | ref arg & ... |
3746
| test.cpp:436:53:436:59 | source1 | test.cpp:439:17:439:23 | source1 |
3847
| test.cpp:436:66:436:66 | b | test.cpp:441:7:441:7 | b |
3948
| test.cpp:437:12:437:13 | 0 | test.cpp:438:19:438:21 | tmp |
4049
| test.cpp:437:12:437:13 | 0 | test.cpp:439:11:439:13 | tmp |
50+
| test.cpp:437:12:437:13 | 0 | test.cpp:439:33:439:35 | tmp |
51+
| test.cpp:437:12:437:13 | 0 | test.cpp:440:8:440:10 | tmp |
52+
| test.cpp:437:12:437:13 | 0 | test.cpp:442:10:442:12 | tmp |
4153
| test.cpp:439:10:439:13 | & ... | test.cpp:439:3:439:8 | call to memcpy |
54+
| test.cpp:439:10:439:13 | ref arg & ... | test.cpp:439:11:439:13 | tmp |
4255
| test.cpp:439:10:439:13 | ref arg & ... | test.cpp:439:33:439:35 | tmp |
4356
| test.cpp:439:10:439:13 | ref arg & ... | test.cpp:440:8:440:10 | tmp |
4457
| test.cpp:439:10:439:13 | ref arg & ... | test.cpp:442:10:442:12 | tmp |

cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -443,17 +443,17 @@ void flowThroughMemcpy_blockvar_with_local_flow(int source1, int b) {
443443
}
444444
}
445445

446-
void cleanedByMemcpy_ssa(int clean1) {
446+
void cleanedByMemcpy_ssa(int clean1) { // currently modeled with BlockVar, not SSA
447447
int tmp;
448448
memcpy(&tmp, &clean1, sizeof tmp);
449-
sink(tmp); // clean
449+
sink(tmp); // clean [FALSE POSITIVE]
450450
}
451451

452452
void cleanedByMemcpy_blockvar(int clean1) {
453453
int tmp;
454454
int *capture = &tmp;
455455
memcpy(&tmp, &clean1, sizeof tmp);
456-
sink(tmp); // clean
456+
sink(tmp); // clean [FALSE POSITIVE]
457457
}
458458

459459
void intRefSource(int &ref_source);

cpp/ql/test/library-tests/dataflow/dataflow-tests/test.expected

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,17 @@
3030
| test.cpp:433:8:433:10 | tmp | test.cpp:430:48:430:54 | source1 |
3131
| test.cpp:440:8:440:10 | tmp | test.cpp:436:53:436:59 | source1 |
3232
| test.cpp:442:10:442:12 | tmp | test.cpp:436:53:436:59 | source1 |
33+
| test.cpp:449:8:449:10 | tmp | test.cpp:447:7:447:9 | tmp |
34+
| test.cpp:456:8:456:10 | tmp | test.cpp:453:7:453:9 | tmp |
35+
| test.cpp:466:8:466:12 | local | test.cpp:464:7:464:11 | local |
3336
| test.cpp:466:8:466:12 | local | test.cpp:465:16:465:20 | ref arg local |
37+
| test.cpp:472:8:472:12 | local | test.cpp:470:7:470:11 | local |
3438
| test.cpp:472:8:472:12 | local | test.cpp:471:20:471:25 | ref arg & ... |
39+
| test.cpp:478:8:478:12 | local | test.cpp:476:7:476:11 | local |
3540
| test.cpp:478:8:478:12 | local | test.cpp:477:20:477:24 | ref arg local |
41+
| test.cpp:485:8:485:12 | local | test.cpp:483:7:483:11 | local |
3642
| test.cpp:485:8:485:12 | local | test.cpp:484:18:484:23 | ref arg & ... |
43+
| test.cpp:491:8:491:12 | local | test.cpp:489:7:489:11 | local |
3744
| test.cpp:491:8:491:12 | local | test.cpp:490:18:490:22 | ref arg local |
3845
| true_upon_entry.cpp:21:8:21:8 | x | true_upon_entry.cpp:17:11:17:16 | call to source |
3946
| true_upon_entry.cpp:29:8:29:8 | x | true_upon_entry.cpp:27:9:27:14 | call to source |

cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,17 @@
1111
| test.cpp:430:48:430:54 | test.cpp:433:8:433:10 | AST only |
1212
| test.cpp:436:53:436:59 | test.cpp:440:8:440:10 | AST only |
1313
| test.cpp:436:53:436:59 | test.cpp:442:10:442:12 | AST only |
14+
| test.cpp:447:7:447:9 | test.cpp:449:8:449:10 | AST only |
15+
| test.cpp:453:7:453:9 | test.cpp:456:8:456:10 | AST only |
16+
| test.cpp:464:7:464:11 | test.cpp:466:8:466:12 | AST only |
1417
| test.cpp:465:16:465:20 | test.cpp:466:8:466:12 | AST only |
18+
| test.cpp:470:7:470:11 | test.cpp:472:8:472:12 | AST only |
1519
| test.cpp:471:20:471:25 | test.cpp:472:8:472:12 | AST only |
20+
| test.cpp:476:7:476:11 | test.cpp:478:8:478:12 | AST only |
1621
| test.cpp:477:20:477:24 | test.cpp:478:8:478:12 | AST only |
22+
| test.cpp:483:7:483:11 | test.cpp:485:8:485:12 | AST only |
1723
| test.cpp:484:18:484:23 | test.cpp:485:8:485:12 | AST only |
24+
| test.cpp:489:7:489:11 | test.cpp:491:8:491:12 | AST only |
1825
| test.cpp:490:18:490:22 | test.cpp:491:8:491:12 | AST only |
1926
| true_upon_entry.cpp:9:11:9:16 | true_upon_entry.cpp:13:8:13:8 | IR only |
2027
| true_upon_entry.cpp:62:11:62:16 | true_upon_entry.cpp:66:8:66:8 | IR only |
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,20 @@
11
| test.cpp:75:7:75:8 | u1 | test.cpp:76:8:76:9 | u1 |
22
| test.cpp:83:7:83:8 | u2 | test.cpp:84:13:84:14 | u2 |
33
| test.cpp:83:7:83:8 | u2 | test.cpp:85:8:85:9 | u2 |
4+
| test.cpp:447:7:447:9 | tmp | test.cpp:448:11:448:13 | tmp |
5+
| test.cpp:447:7:447:9 | tmp | test.cpp:449:8:449:10 | tmp |
46
| test.cpp:453:7:453:9 | tmp | test.cpp:454:19:454:21 | tmp |
7+
| test.cpp:453:7:453:9 | tmp | test.cpp:455:11:455:13 | tmp |
8+
| test.cpp:453:7:453:9 | tmp | test.cpp:456:8:456:10 | tmp |
9+
| test.cpp:464:7:464:11 | local | test.cpp:465:16:465:20 | local |
10+
| test.cpp:464:7:464:11 | local | test.cpp:466:8:466:12 | local |
11+
| test.cpp:470:7:470:11 | local | test.cpp:471:21:471:25 | local |
12+
| test.cpp:470:7:470:11 | local | test.cpp:472:8:472:12 | local |
13+
| test.cpp:476:7:476:11 | local | test.cpp:477:20:477:24 | local |
14+
| test.cpp:476:7:476:11 | local | test.cpp:478:8:478:12 | local |
15+
| test.cpp:476:7:476:11 | local | test.cpp:479:9:479:13 | local |
16+
| test.cpp:483:7:483:11 | local | test.cpp:484:19:484:23 | local |
17+
| test.cpp:483:7:483:11 | local | test.cpp:485:8:485:12 | local |
18+
| test.cpp:489:7:489:11 | local | test.cpp:490:18:490:22 | local |
19+
| test.cpp:489:7:489:11 | local | test.cpp:491:8:491:12 | local |
20+
| test.cpp:489:7:489:11 | local | test.cpp:492:9:492:13 | local |

cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,13 +128,24 @@
128128
| taint.cpp:164:19:164:24 | call to source | taint.cpp:168:8:168:14 | tainted | |
129129
| taint.cpp:164:19:164:24 | call to source | taint.cpp:172:18:172:24 | tainted | |
130130
| taint.cpp:165:22:165:25 | {...} | taint.cpp:170:10:170:15 | buffer | |
131+
| taint.cpp:165:22:165:25 | {...} | taint.cpp:171:8:171:13 | buffer | |
132+
| taint.cpp:165:22:165:25 | {...} | taint.cpp:172:10:172:15 | buffer | |
133+
| taint.cpp:165:22:165:25 | {...} | taint.cpp:173:8:173:13 | buffer | |
131134
| taint.cpp:165:24:165:24 | 0 | taint.cpp:165:22:165:25 | {...} | TAINT |
132135
| taint.cpp:170:10:170:15 | buffer | taint.cpp:170:3:170:8 | call to strcpy | |
136+
| taint.cpp:170:10:170:15 | ref arg buffer | taint.cpp:170:10:170:15 | buffer | |
133137
| taint.cpp:170:10:170:15 | ref arg buffer | taint.cpp:171:8:171:13 | buffer | |
138+
| taint.cpp:170:10:170:15 | ref arg buffer | taint.cpp:172:10:172:15 | buffer | |
139+
| taint.cpp:170:10:170:15 | ref arg buffer | taint.cpp:173:8:173:13 | buffer | |
140+
| taint.cpp:171:8:171:13 | ref arg buffer | taint.cpp:171:8:171:13 | buffer | |
134141
| taint.cpp:171:8:171:13 | ref arg buffer | taint.cpp:172:10:172:15 | buffer | |
142+
| taint.cpp:171:8:171:13 | ref arg buffer | taint.cpp:173:8:173:13 | buffer | |
135143
| taint.cpp:172:10:172:15 | buffer | taint.cpp:172:3:172:8 | call to strcat | |
144+
| taint.cpp:172:10:172:15 | ref arg buffer | taint.cpp:172:10:172:15 | buffer | |
136145
| taint.cpp:172:10:172:15 | ref arg buffer | taint.cpp:173:8:173:13 | buffer | |
146+
| taint.cpp:173:8:173:13 | ref arg buffer | taint.cpp:173:8:173:13 | buffer | |
137147
| taint.cpp:180:19:180:19 | p | taint.cpp:181:9:181:9 | p | |
138148
| taint.cpp:181:9:181:9 | p | taint.cpp:181:8:181:9 | * ... | TAINT |
139149
| taint.cpp:185:11:185:16 | call to source | taint.cpp:186:11:186:11 | x | |
150+
| taint.cpp:186:10:186:11 | ref arg & ... | taint.cpp:186:11:186:11 | x | |
140151
| taint.cpp:186:11:186:11 | x | taint.cpp:186:10:186:11 | & ... | TAINT |

0 commit comments

Comments
 (0)