Skip to content

Commit 1406058

Browse files
[LLVM][InstCombine] Extend masked_gather's demanded elt analysis. (#151732)
Add support for other Constant types for the mask operand.
1 parent 04f9888 commit 1406058

File tree

2 files changed

+10
-6
lines changed

2 files changed

+10
-6
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1834,14 +1834,17 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
18341834
// segfaults which didn't exist in the original program.
18351835
APInt DemandedPtrs(APInt::getAllOnes(VWidth)),
18361836
DemandedPassThrough(DemandedElts);
1837-
if (auto *CV = dyn_cast<ConstantVector>(II->getOperand(2)))
1837+
if (auto *CMask = dyn_cast<Constant>(II->getOperand(2))) {
18381838
for (unsigned i = 0; i < VWidth; i++) {
1839-
Constant *CElt = CV->getAggregateElement(i);
1840-
if (CElt->isNullValue())
1841-
DemandedPtrs.clearBit(i);
1842-
else if (CElt->isAllOnesValue())
1843-
DemandedPassThrough.clearBit(i);
1839+
if (Constant *CElt = CMask->getAggregateElement(i)) {
1840+
if (CElt->isNullValue())
1841+
DemandedPtrs.clearBit(i);
1842+
else if (CElt->isAllOnesValue())
1843+
DemandedPassThrough.clearBit(i);
1844+
}
18441845
}
1846+
}
1847+
18451848
if (II->getIntrinsicID() == Intrinsic::masked_gather)
18461849
simplifyAndSetOp(II, 0, DemandedPtrs, PoisonElts2);
18471850
simplifyAndSetOp(II, 3, DemandedPassThrough, PoisonElts3);

llvm/test/Transforms/InstCombine/masked_intrinsics.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
3+
; RUN: opt -passes=instcombine -use-constant-int-for-fixed-length-splat -S < %s | FileCheck %s
34

45
declare <2 x double> @llvm.masked.load.v2f64.p0(ptr %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
56
declare void @llvm.masked.store.v2f64.p0(<2 x double> %val, ptr %ptrs, i32, <2 x i1> %mask)

0 commit comments

Comments
 (0)