Skip to content

Commit 55f9ecc

Browse files
authored
[LV] Revert back to use Loop::isLoopInvariant in isPredicatedInst. (#150828)
This partially reverts #140744, restoring the original TheLoop->isLoopInvariant check instead the more powerful Legal->isInvariant, which uses SCEV. This causes a mis-compile, because SCEV can prove that the stored value is loop-invariant, which in turn converts the store to a uniform store. But in VPlan, we aren't yet able to determine that the stored value is loop-invariant, so we extract the last lane, which is incorrect, because it does not account for the mask of the store. Restoring the original code is a safe fix and avoids this subtle divergence. Fixes #149347. PR: #150828
1 parent f3c531c commit 55f9ecc

File tree

2 files changed

+48
-4
lines changed

2 files changed

+48
-4
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3022,7 +3022,7 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
30223022
// is correct. The easiest form of the later is to require that all values
30233023
// stored are the same.
30243024
return !(Legal->isInvariant(getLoadStorePointerOperand(I)) &&
3025-
Legal->isInvariant(cast<StoreInst>(I)->getValueOperand()));
3025+
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
30263026
}
30273027
case Instruction::UDiv:
30283028
case Instruction::SDiv:

llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,42 @@ define void @loop_invariant_store(ptr %p, i64 %a, i8 %b) {
1717
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32>
1818
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1919
; CHECK: [[VECTOR_BODY]]:
20-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
21-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
20+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE8:.*]] ]
21+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE8]] ]
2222
; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 8)
2323
; CHECK-NEXT: [[TMP5:%.*]] = icmp sge <4 x i32> [[VEC_IND]], splat (i32 2)
2424
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
2525
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
2626
; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8)
2727
; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8>
28+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
29+
; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
30+
; CHECK: [[PRED_STORE_IF]]:
31+
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i8> [[TMP8]], i32 0
32+
; CHECK-NEXT: store i8 [[TMP17]], ptr [[P]], align 1
33+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
34+
; CHECK: [[PRED_STORE_CONTINUE]]:
35+
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
36+
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
37+
; CHECK: [[PRED_STORE_IF3]]:
38+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i8> [[TMP8]], i32 1
39+
; CHECK-NEXT: store i8 [[TMP12]], ptr [[P]], align 1
40+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
41+
; CHECK: [[PRED_STORE_CONTINUE4]]:
42+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
43+
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
44+
; CHECK: [[PRED_STORE_IF5]]:
45+
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i8> [[TMP8]], i32 2
46+
; CHECK-NEXT: store i8 [[TMP14]], ptr [[P]], align 1
47+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
48+
; CHECK: [[PRED_STORE_CONTINUE6]]:
49+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
50+
; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8]]
51+
; CHECK: [[PRED_STORE_IF7]]:
2852
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i8> [[TMP8]], i32 3
2953
; CHECK-NEXT: store i8 [[TMP9]], ptr [[P]], align 1
54+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]]
55+
; CHECK: [[PRED_STORE_CONTINUE8]]:
3056
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
3157
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
3258
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
@@ -263,7 +289,6 @@ exit: ; preds = %loop.latch
263289
}
264290

265291
; Test case for https://github.com/llvm/llvm-project/issues/149347.
266-
; FIXME: Currently mis-compiles.
267292
define void @test_store_to_invariant_address_needs_mask_due_to_low_trip_count(ptr %dst) {
268293
; CHECK-LABEL: define void @test_store_to_invariant_address_needs_mask_due_to_low_trip_count(
269294
; CHECK-SAME: ptr [[DST:%.*]]) {
@@ -272,7 +297,26 @@ define void @test_store_to_invariant_address_needs_mask_due_to_low_trip_count(pt
272297
; CHECK: [[VECTOR_PH]]:
273298
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
274299
; CHECK: [[VECTOR_BODY]]:
300+
; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
301+
; CHECK: [[PRED_STORE_IF]]:
302+
; CHECK-NEXT: store i32 1, ptr [[DST]], align 4
303+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
304+
; CHECK: [[PRED_STORE_CONTINUE]]:
305+
; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
306+
; CHECK: [[PRED_STORE_IF1]]:
307+
; CHECK-NEXT: store i32 1, ptr [[DST]], align 4
308+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
309+
; CHECK: [[PRED_STORE_CONTINUE2]]:
310+
; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
311+
; CHECK: [[PRED_STORE_IF3]]:
312+
; CHECK-NEXT: store i32 1, ptr [[DST]], align 4
313+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
314+
; CHECK: [[PRED_STORE_CONTINUE4]]:
315+
; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
316+
; CHECK: [[PRED_STORE_IF5]]:
275317
; CHECK-NEXT: store i32 0, ptr [[DST]], align 4
318+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
319+
; CHECK: [[PRED_STORE_CONTINUE6]]:
276320
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
277321
; CHECK: [[MIDDLE_BLOCK]]:
278322
; CHECK-NEXT: br label %[[EXIT:.*]]

0 commit comments

Comments
 (0)