@@ -17,16 +17,42 @@ define void @loop_invariant_store(ptr %p, i64 %a, i8 %b) {
17
17
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32>
18
18
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
19
19
; CHECK: [[VECTOR_BODY]]:
20
- ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY ]] ]
21
- ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY ]] ]
20
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE8:.* ]] ]
21
+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE8 ]] ]
22
22
; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 8)
23
23
; CHECK-NEXT: [[TMP5:%.*]] = icmp sge <4 x i32> [[VEC_IND]], splat (i32 2)
24
24
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
25
25
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
26
26
; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8)
27
27
; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8>
28
+ ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
29
+ ; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
30
+ ; CHECK: [[PRED_STORE_IF]]:
31
+ ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i8> [[TMP8]], i32 0
32
+ ; CHECK-NEXT: store i8 [[TMP17]], ptr [[P]], align 1
33
+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
34
+ ; CHECK: [[PRED_STORE_CONTINUE]]:
35
+ ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
36
+ ; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
37
+ ; CHECK: [[PRED_STORE_IF3]]:
38
+ ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i8> [[TMP8]], i32 1
39
+ ; CHECK-NEXT: store i8 [[TMP12]], ptr [[P]], align 1
40
+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
41
+ ; CHECK: [[PRED_STORE_CONTINUE4]]:
42
+ ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
43
+ ; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
44
+ ; CHECK: [[PRED_STORE_IF5]]:
45
+ ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i8> [[TMP8]], i32 2
46
+ ; CHECK-NEXT: store i8 [[TMP14]], ptr [[P]], align 1
47
+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
48
+ ; CHECK: [[PRED_STORE_CONTINUE6]]:
49
+ ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
50
+ ; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8]]
51
+ ; CHECK: [[PRED_STORE_IF7]]:
28
52
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i8> [[TMP8]], i32 3
29
53
; CHECK-NEXT: store i8 [[TMP9]], ptr [[P]], align 1
54
+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]]
55
+ ; CHECK: [[PRED_STORE_CONTINUE8]]:
30
56
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
31
57
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
32
58
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
@@ -263,7 +289,6 @@ exit: ; preds = %loop.latch
263
289
}
264
290
265
291
; Test case for https://github.com/llvm/llvm-project/issues/149347.
266
- ; FIXME: Currently mis-compiles.
267
292
define void @test_store_to_invariant_address_needs_mask_due_to_low_trip_count (ptr %dst ) {
268
293
; CHECK-LABEL: define void @test_store_to_invariant_address_needs_mask_due_to_low_trip_count(
269
294
; CHECK-SAME: ptr [[DST:%.*]]) {
@@ -272,7 +297,26 @@ define void @test_store_to_invariant_address_needs_mask_due_to_low_trip_count(pt
272
297
; CHECK: [[VECTOR_PH]]:
273
298
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
274
299
; CHECK: [[VECTOR_BODY]]:
300
+ ; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
301
+ ; CHECK: [[PRED_STORE_IF]]:
302
+ ; CHECK-NEXT: store i32 1, ptr [[DST]], align 4
303
+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
304
+ ; CHECK: [[PRED_STORE_CONTINUE]]:
305
+ ; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
306
+ ; CHECK: [[PRED_STORE_IF1]]:
307
+ ; CHECK-NEXT: store i32 1, ptr [[DST]], align 4
308
+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
309
+ ; CHECK: [[PRED_STORE_CONTINUE2]]:
310
+ ; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
311
+ ; CHECK: [[PRED_STORE_IF3]]:
312
+ ; CHECK-NEXT: store i32 1, ptr [[DST]], align 4
313
+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
314
+ ; CHECK: [[PRED_STORE_CONTINUE4]]:
315
+ ; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
316
+ ; CHECK: [[PRED_STORE_IF5]]:
275
317
; CHECK-NEXT: store i32 0, ptr [[DST]], align 4
318
+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
319
+ ; CHECK: [[PRED_STORE_CONTINUE6]]:
276
320
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
277
321
; CHECK: [[MIDDLE_BLOCK]]:
278
322
; CHECK-NEXT: br label %[[EXIT:.*]]
0 commit comments