Skip to content

Commit 86916ff

Browse files
authored
[LV] Fix gap mask requirement for interleaved access (#151105)
When interleaved stores contain gaps, a mask is required to skip the gaps, regardless of whether scalar epilogues are allowed. This patch corrects the condition under which a gap mask is needed, ensuring consistency between the legacy and VPlan-based cost models and avoiding assertion failures. Related #149981
1 parent f890591 commit 86916ff

File tree

3 files changed

+63
-1
lines changed

3 files changed

+63
-1
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3548,6 +3548,8 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
35483548
// Vectorize the interleaved store group.
35493549
Value *MaskForGaps =
35503550
createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);
3551+
assert(((MaskForGaps != nullptr) == NeedsMaskForGaps) &&
3552+
"Mismatch between NeedsMaskForGaps and MaskForGaps");
35513553
assert((!MaskForGaps || !State.VF.isScalable()) &&
35523554
"masking gaps for scalable vectors is not yet supported.");
35533555
ArrayRef<VPValue *> StoredValues = getStoredValues();

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2563,7 +2563,8 @@ void VPlanTransforms::createInterleaveGroups(
25632563
}
25642564

25652565
bool NeedsMaskForGaps =
2566-
IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed;
2566+
(IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
2567+
(!StoredValues.empty() && !IG->isFull());
25672568

25682569
Instruction *IRInsertPos = IG->getInsertPos();
25692570
auto *InsertPos =
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
2+
; RUN: opt -mtriple=riscv64 -mattr=+v -passes=loop-vectorize \
3+
; RUN: -scalable-vectorization=off -enable-masked-interleaved-mem-accesses \
4+
; RUN: -force-vector-interleave=1 -riscv-v-vector-bits-min=1024 -S < %s | FileCheck %s
5+
6+
define void @store_factor_2_with_tail_gap(i64 %n, ptr %a) {
7+
; CHECK-LABEL: define void @store_factor_2_with_tail_gap(
8+
; CHECK-SAME: i64 [[N:%.*]], ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
9+
; CHECK-NEXT: [[ENTRY:.*]]:
10+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
11+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
12+
; CHECK: [[VECTOR_PH]]:
13+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
14+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
15+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
16+
; CHECK: [[VECTOR_BODY]]:
17+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
18+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
19+
; CHECK-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
20+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
21+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i64> [[VEC_IND]], <16 x i64> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
22+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i64> [[TMP2]], <32 x i64> poison, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
23+
; CHECK-NEXT: call void @llvm.masked.store.v32i64.p0(<32 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], i32 8, <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>)
24+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
25+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
26+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
27+
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
28+
; CHECK: [[MIDDLE_BLOCK]]:
29+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
30+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
31+
; CHECK: [[SCALAR_PH]]:
32+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
33+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
34+
; CHECK: [[FOR_BODY]]:
35+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
36+
; CHECK-NEXT: [[TMP4:%.*]] = shl nsw i64 [[INDVARS_IV]], 1
37+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
38+
; CHECK-NEXT: store i64 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 8
39+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
40+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
41+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
42+
; CHECK: [[EXIT]]:
43+
; CHECK-NEXT: ret void
44+
;
45+
entry:
46+
br label %for.body
47+
48+
for.body:
49+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
50+
%0 = shl nsw i64 %iv, 1
51+
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %0
52+
store i64 %iv, ptr %arrayidx, align 8
53+
%iv.next = add nuw nsw i64 %iv, 1
54+
%exitcond.not = icmp eq i64 %iv.next, %n
55+
br i1 %exitcond.not, label %exit, label %for.body
56+
57+
exit:
58+
ret void
59+
}

0 commit comments

Comments
 (0)