Skip to content

Commit 603a3af

Browse files
author
Dave Bartolomeo
committed
C++: Treat implicit end of body of non-void function as Unreached
When the extractor can't prove that control flow will never reach the end of a non-`void`-returning function without reaching an explicit `return` statement, it inserts an implicit `return` without an operand. If control actually reaches this point, the behavior is undefined. We were previously generating invalid IR for these implicit `return` statements, because the lack of an operand meant that there was no definition of the return value variable along that path. Instead, I've changed the IR generation to emit an `Unreached` instruction for the implicit `return`. This ensures that we don't create a control flow edge from the end of the body to the function epilogue. The change to the range analysis test avoids having that test depend on the previous bad IR behavior, while still preserving the original spirit of the test.
1 parent 7c5c9ea commit 603a3af

File tree

7 files changed

+104
-10
lines changed

7 files changed

+104
-10
lines changed

cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedFunction.qll

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ CppType getEllipsisVariablePRValueType() {
4949

5050
CppType getEllipsisVariableGLValueType() { result = getTypeForGLValue(any(UnknownType t)) }
5151

52+
/**
53+
* Holds if the function returns a value, as opposed to returning `void`.
54+
*/
55+
predicate hasReturnValue(Function func) { not func.getUnspecifiedType() instanceof VoidType }
56+
5257
/**
5358
* Represents the IR translation of a function. This is the root elements for
5459
* all other elements associated with this function.
@@ -312,7 +317,7 @@ class TranslatedFunction extends TranslatedElement, TTranslatedFunction {
312317
/**
313318
* Holds if the function has a non-`void` return type.
314319
*/
315-
final predicate hasReturnValue() { not func.getUnspecifiedType() instanceof VoidType }
320+
final predicate hasReturnValue() { hasReturnValue(func) }
316321

317322
/**
318323
* Gets the single `UnmodeledDefinition` instruction for this function.

cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedStmt.qll

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,9 @@ abstract class TranslatedReturnStmt extends TranslatedStmt {
131131
}
132132
}
133133

134+
/**
135+
* The IR translation of a `return` statement that returns a value.
136+
*/
134137
class TranslatedReturnValueStmt extends TranslatedReturnStmt, TranslatedVariableInitialization {
135138
TranslatedReturnValueStmt() { stmt.hasExpr() }
136139

@@ -147,8 +150,14 @@ class TranslatedReturnValueStmt extends TranslatedReturnStmt, TranslatedVariable
147150
final override IRVariable getIRVariable() { result = getEnclosingFunction().getReturnVariable() }
148151
}
149152

153+
/**
154+
* The IR translation of a `return` statement that does not return a value. This includes implicit
155+
* return statements at the end of `void`-returning functions.
156+
*/
150157
class TranslatedReturnVoidStmt extends TranslatedReturnStmt {
151-
TranslatedReturnVoidStmt() { not stmt.hasExpr() }
158+
TranslatedReturnVoidStmt() {
159+
not stmt.hasExpr() and not hasReturnValue(stmt.getEnclosingFunction())
160+
}
152161

153162
override TranslatedElement getChild(int id) { none() }
154163

@@ -169,6 +178,33 @@ class TranslatedReturnVoidStmt extends TranslatedReturnStmt {
169178
override Instruction getChildSuccessor(TranslatedElement child) { none() }
170179
}
171180

181+
/**
182+
* The IR translation of an implicit `return` statement generated by the extractor to handle control
183+
* flow that reaches the end of a non-`void`-returning function body. Since such control flow
184+
* produces undefined behavior, we simply generate an `Unreached` instruction to prevent that flow
185+
* from continuing on to pollute other analysis. The assumption is that the developer is certain
186+
* that the implicit `return` is unreachable, even if the compiler cannot prove it.
187+
*/
188+
class TranslatedUnreachableReturnStmt extends TranslatedReturnStmt {
189+
TranslatedUnreachableReturnStmt() {
190+
not stmt.hasExpr() and hasReturnValue(stmt.getEnclosingFunction())
191+
}
192+
193+
override TranslatedElement getChild(int id) { none() }
194+
195+
override Instruction getFirstInstruction() { result = getInstruction(OnlyInstructionTag()) }
196+
197+
override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
198+
tag = OnlyInstructionTag() and
199+
opcode instanceof Opcode::Unreached and
200+
resultType = getVoidType()
201+
}
202+
203+
override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) { none() }
204+
205+
override Instruction getChildSuccessor(TranslatedElement child) { none() }
206+
}
207+
172208
/**
173209
* The IR translation of a C++ `try` statement.
174210
*/

cpp/ql/test/library-tests/ir/ir/PrintAST.expected

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8750,6 +8750,23 @@ ir.cpp:
87508750
# 1286| Type = [PointerType] A *
87518751
# 1286| ValueCategory = prvalue
87528752
# 1287| 12: [ReturnStmt] return ...
8753+
# 1289| [TopLevelFunction] int missingReturnValue(bool, int)
8754+
# 1289| params:
8755+
# 1289| 0: [Parameter] b
8756+
# 1289| Type = [BoolType] bool
8757+
# 1289| 1: [Parameter] x
8758+
# 1289| Type = [IntType] int
8759+
# 1289| body: [Block] { ... }
8760+
# 1290| 0: [IfStmt] if (...) ...
8761+
# 1290| 0: [VariableAccess] b
8762+
# 1290| Type = [BoolType] bool
8763+
# 1290| ValueCategory = prvalue(load)
8764+
# 1290| 1: [Block] { ... }
8765+
# 1291| 0: [ReturnStmt] return ...
8766+
# 1291| 0: [VariableAccess] x
8767+
# 1291| Type = [IntType] int
8768+
# 1291| ValueCategory = prvalue(load)
8769+
# 1293| 1: [ReturnStmt] return ...
87538770
perf-regression.cpp:
87548771
# 4| [CopyAssignmentOperator] Big& Big::operator=(Big const&)
87558772
# 4| params:

cpp/ql/test/library-tests/ir/ir/ir.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,10 +1249,10 @@ char *strcpy(char *destination, const char *source);
12491249
char *strcat(char *destination, const char *source);
12501250

12511251
void test_strings(char *s1, char *s2) {
1252-
char buffer[1024] = {0};
1252+
char buffer[1024] = {0};
12531253

1254-
strcpy(buffer, s1);
1255-
strcat(buffer, s2);
1254+
strcpy(buffer, s1);
1255+
strcat(buffer, s2);
12561256
}
12571257

12581258
struct A {
@@ -1286,4 +1286,10 @@ void test_static_member_functions(int int_arg, A* a_arg) {
12861286
getAnInstanceOfA()->static_member_without_def();
12871287
}
12881288

1289+
int missingReturnValue(bool b, int x) {
1290+
if (b) {
1291+
return x;
1292+
}
1293+
}
1294+
12891295
// semmle-extractor-options: -std=c++17 --clang

cpp/ql/test/library-tests/ir/ir/raw_ir.expected

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6631,6 +6631,36 @@ ir.cpp:
66316631
# 1270| v1270_14(void) = AliasedUse : ~mu1270_4
66326632
# 1270| v1270_15(void) = ExitFunction :
66336633

6634+
# 1289| int missingReturnValue(bool, int)
6635+
# 1289| Block 0
6636+
# 1289| v1289_1(void) = EnterFunction :
6637+
# 1289| mu1289_2(unknown) = AliasedDefinition :
6638+
# 1289| mu1289_3(unknown) = InitializeNonLocal :
6639+
# 1289| mu1289_4(unknown) = UnmodeledDefinition :
6640+
# 1289| r1289_5(glval<bool>) = VariableAddress[b] :
6641+
# 1289| mu1289_6(bool) = InitializeParameter[b] : &:r1289_5
6642+
# 1289| r1289_7(glval<int>) = VariableAddress[x] :
6643+
# 1289| mu1289_8(int) = InitializeParameter[x] : &:r1289_7
6644+
# 1290| r1290_1(glval<bool>) = VariableAddress[b] :
6645+
# 1290| r1290_2(bool) = Load : &:r1290_1, ~mu1289_4
6646+
# 1290| v1290_3(void) = ConditionalBranch : r1290_2
6647+
#-----| False -> Block 1
6648+
#-----| True -> Block 2
6649+
6650+
# 1293| Block 1
6651+
# 1293| v1293_1(void) = Unreached :
6652+
6653+
# 1291| Block 2
6654+
# 1291| r1291_1(glval<int>) = VariableAddress[#return] :
6655+
# 1291| r1291_2(glval<int>) = VariableAddress[x] :
6656+
# 1291| r1291_3(int) = Load : &:r1291_2, ~mu1289_4
6657+
# 1291| mu1291_4(int) = Store : &:r1291_1, r1291_3
6658+
# 1289| r1289_9(glval<int>) = VariableAddress[#return] :
6659+
# 1289| v1289_10(void) = ReturnValue : &:r1289_9, ~mu1289_4
6660+
# 1289| v1289_11(void) = UnmodeledUse : mu*
6661+
# 1289| v1289_12(void) = AliasedUse : ~mu1289_4
6662+
# 1289| v1289_13(void) = ExitFunction :
6663+
66346664
perf-regression.cpp:
66356665
# 6| void Big::Big()
66366666
# 6| Block 0

cpp/ql/test/library-tests/rangeanalysis/rangeanalysis/RangeAnalysis.expected

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
| test.cpp:97:10:97:10 | Load: x | file://:0:0:0:0 | 0 | 1 | false | CompareLT: ... < ... | test.cpp:94:7:94:11 | test.cpp:94:7:94:11 |
3636
| test.cpp:100:10:100:10 | Load: x | file://:0:0:0:0 | 0 | 1 | true | CompareLE: ... <= ... | test.cpp:99:7:99:12 | test.cpp:99:7:99:12 |
3737
| test.cpp:102:10:102:10 | Load: x | file://:0:0:0:0 | 0 | 2 | false | CompareLE: ... <= ... | test.cpp:99:7:99:12 | test.cpp:99:7:99:12 |
38-
| test.cpp:107:5:107:10 | Phi: test10 | test.cpp:114:3:114:6 | Phi: call to sink | -1 | true | CompareLT: ... < ... | test.cpp:115:18:115:22 | test.cpp:115:18:115:22 |
38+
| test.cpp:117:10:117:10 | Load: i | test.cpp:114:3:114:6 | Phi: call to sink | -1 | true | CompareLT: ... < ... | test.cpp:116:7:116:11 | test.cpp:116:7:116:11 |
3939
| test.cpp:130:10:130:10 | Load: i | file://:0:0:0:0 | 0 | 0 | false | NoReason | file://:0:0:0:0 | file://:0:0:0:0 |
4040
| test.cpp:140:10:140:10 | Store: i | file://:0:0:0:0 | 0 | 1 | false | NoReason | file://:0:0:0:0 | file://:0:0:0:0 |
4141
| test.cpp:140:10:140:10 | Store: i | test.cpp:135:16:135:16 | InitializeParameter: x | 0 | false | CompareLT: ... < ... | test.cpp:139:11:139:15 | test.cpp:139:11:139:15 |

cpp/ql/test/library-tests/rangeanalysis/rangeanalysis/test.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,17 +104,17 @@ void test9(int x) {
104104
}
105105

106106
// Phi nodes as bounds
107-
int test10(int y, int z, bool use_y) {
107+
void test10(int y, int z, bool use_y) {
108108
int x;
109109
if(use_y) {
110110
x = y;
111111
} else {
112112
x = z;
113113
}
114114
sink();
115-
for(int i = 0; i < x; i++) {
116-
return i;
117-
}
115+
int i = source();
116+
if (i < x)
117+
sink(i);
118118
}
119119

120120
// Irreducible CFGs

0 commit comments

Comments
 (0)