Skip to content

Commit 2bbc614

Browse files
authored
[InstCombine] Support offsets in memset to load forwarding (#151924)
Adds support for load offsets when performing `memset` load forwarding.
1 parent d72e58e commit 2bbc614

File tree

3 files changed

+46
-19
lines changed

3 files changed

+46
-19
lines changed

llvm/lib/Analysis/Loads.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -631,9 +631,13 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr,
631631
if (!Val || !Len)
632632
return nullptr;
633633

634-
// TODO: Handle offsets.
635-
Value *Dst = MSI->getDest();
636-
if (!AreEquivalentAddressValues(Dst, Ptr))
634+
// Handle offsets.
635+
int64_t StoreOffset = 0, LoadOffset = 0;
636+
const Value *StoreBase =
637+
GetPointerBaseWithConstantOffset(MSI->getDest(), StoreOffset, DL);
638+
const Value *LoadBase =
639+
GetPointerBaseWithConstantOffset(Ptr, LoadOffset, DL);
640+
if (StoreBase != LoadBase || LoadOffset < StoreOffset)
637641
return nullptr;
638642

639643
if (IsLoadCSE)
@@ -645,7 +649,7 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr,
645649

646650
// Make sure the read bytes are contained in the memset.
647651
uint64_t LoadSize = LoadTypeSize.getFixedValue();
648-
if ((Len->getValue() * 8).ult(LoadSize))
652+
if ((Len->getValue() * 8).ult(LoadSize + (LoadOffset - StoreOffset) * 8))
649653
return nullptr;
650654

651655
APInt Splat = LoadSize >= 8 ? APInt::getSplat(LoadSize, Val->getValue())

llvm/test/Analysis/GlobalsModRef/memset-escape.ll

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,14 @@ target triple = "x86_64-apple-macosx10.10.0"
77
@a = internal global [3 x i32] zeroinitializer, align 4
88
@b = common global i32 0, align 4
99

10-
; The important thing we're checking for here is the reload of (some element of)
11-
; @a after the memset.
10+
; The important thing we're checking here is that the value from the memset
11+
; rather than the preceding store is forwarded.
1212

1313
define i32 @main() {
1414
; CHECK-LABEL: define noundef i32 @main(
1515
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
1616
; CHECK-NEXT: [[ENTRY:.*:]]
17-
; CHECK-NEXT: store i32 1, ptr getelementptr inbounds nuw (i8, ptr @a, i64 8), align 4
18-
; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(12) @a, i8 0, i64 12, i1 false)
1917
; CHECK-NEXT: store i32 3, ptr @b, align 4
20-
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @a, i64 8), align 4
21-
; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
22-
; CHECK-NEXT: br i1 [[CMP1_NOT]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
23-
; CHECK: [[IF_THEN]]:
24-
; CHECK-NEXT: tail call void @abort()
25-
; CHECK-NEXT: unreachable
26-
; CHECK: [[IF_END]]:
2718
; CHECK-NEXT: ret i32 0
2819
;
2920
entry:

llvm/test/Transforms/InstCombine/load-store-forward.ll

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -365,20 +365,52 @@ define i32 @load_after_memset_unknown(ptr %a, i8 %byte) {
365365
ret i32 %v
366366
}
367367

368-
; TODO: Handle load at offset.
369368
define i32 @load_after_memset_0_offset(ptr %a) {
370369
; CHECK-LABEL: @load_after_memset_0_offset(
371370
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
372-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 4
373-
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4
374-
; CHECK-NEXT: ret i32 [[V]]
371+
; CHECK-NEXT: ret i32 0
375372
;
376373
call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
377374
%gep = getelementptr i8, ptr %a, i64 4
378375
%v = load i32, ptr %gep
379376
ret i32 %v
380377
}
381378

379+
define i32 @load_after_memset_1_offset(ptr %a) {
380+
; CHECK-LABEL: @load_after_memset_1_offset(
381+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false)
382+
; CHECK-NEXT: ret i32 16843009
383+
;
384+
call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false)
385+
%gep = getelementptr i8, ptr %a, i64 4
386+
%v = load i32, ptr %gep
387+
ret i32 %v
388+
}
389+
390+
define i1 @load_after_memset_0_offset_i1(ptr %a) {
391+
; CHECK-LABEL: @load_after_memset_0_offset_i1(
392+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
393+
; CHECK-NEXT: ret i1 false
394+
;
395+
call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
396+
%gep = getelementptr i1, ptr %a, i64 12
397+
%v = load i1, ptr %gep
398+
ret i1 %v
399+
}
400+
401+
define i8 @neg_load_after_memset_0_neg_offset(ptr %a) {
402+
; CHECK-LABEL: @neg_load_after_memset_0_neg_offset(
403+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 2
404+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[GEP]], i8 0, i64 16, i1 false)
405+
; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[A]], align 1
406+
; CHECK-NEXT: ret i8 [[V]]
407+
;
408+
%gep = getelementptr i8, ptr %a, i64 2
409+
call void @llvm.memset.p0.i64(ptr %gep, i8 0, i64 16, i1 false)
410+
%v = load i8, ptr %a
411+
ret i8 %v
412+
}
413+
382414
define i32 @load_after_memset_0_offset_too_large(ptr %a) {
383415
; CHECK-LABEL: @load_after_memset_0_offset_too_large(
384416
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)

0 commit comments

Comments
 (0)