Skip to content

Commit e2cb121

Browse files
committed
[X86] Remove maximum vector length limit from combineBasicSADPattern.
createPSADBW uses SplitsOpsAndApply so should be able to handle any size. Restrict the extract result type to i32 or i64 since that's what we have coverage for today and probably matches what the isSimple() check gave us before. Differential Revision: https://reviews.llvm.org/D76560
1 parent 9860517 commit e2cb121

File tree

2 files changed

+68
-483
lines changed

2 files changed

+68
-483
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -37809,21 +37809,14 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
3780937809
if (!Subtarget.hasSSE2())
3781037810
return SDValue();
3781137811

37812-
// Verify the type we're extracting from is any integer type above i16.
37813-
EVT VT = Extract->getOperand(0).getValueType();
37814-
if (!VT.isSimple() || !(VT.getVectorElementType().getSizeInBits() > 16))
37812+
EVT ExtractVT = Extract->getValueType(0);
37813+
// Verify the type we're extracting is either i32 or i64.
37814+
// FIXME: Could support other types, but this is what we have coverage for.
37815+
if (ExtractVT != MVT::i32 && ExtractVT != MVT::i64)
3781537816
return SDValue();
3781637817

37817-
unsigned RegSize = 128;
37818-
if (Subtarget.useBWIRegs())
37819-
RegSize = 512;
37820-
else if (Subtarget.hasAVX())
37821-
RegSize = 256;
37822-
37823-
// We handle upto v16i* for SSE2 / v32i* for AVX / v64i* for AVX512.
37824-
// TODO: We should be able to handle larger vectors by splitting them before
37825-
// feeding them into several SADs, and then reducing over those.
37826-
if (RegSize / VT.getVectorNumElements() < 8)
37818+
EVT VT = Extract->getOperand(0).getValueType();
37819+
if (!isPowerOf2_32(VT.getVectorNumElements()))
3782737820
return SDValue();
3782837821

3782937822
// Match shuffle + add pyramid.
@@ -37839,8 +37832,8 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
3783937832
// (extends the sign bit which is zero).
3784037833
// So it is correct to skip the sign/zero extend instruction.
3784137834
if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND ||
37842-
Root.getOpcode() == ISD::ZERO_EXTEND ||
37843-
Root.getOpcode() == ISD::ANY_EXTEND))
37835+
Root.getOpcode() == ISD::ZERO_EXTEND ||
37836+
Root.getOpcode() == ISD::ANY_EXTEND))
3784437837
Root = Root.getOperand(0);
3784537838

3784637839
// If there was a match, we want Root to be a select that is the root of an
@@ -37860,7 +37853,7 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
3786037853
// If the original vector was wider than 8 elements, sum over the results
3786137854
// in the SAD vector.
3786237855
unsigned Stages = Log2_32(VT.getVectorNumElements());
37863-
MVT SadVT = SAD.getSimpleValueType();
37856+
EVT SadVT = SAD.getValueType();
3786437857
if (Stages > 3) {
3786537858
unsigned SadElems = SadVT.getVectorNumElements();
3786637859

@@ -37875,12 +37868,12 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
3787537868
}
3787637869
}
3787737870

37878-
MVT Type = Extract->getSimpleValueType(0);
37879-
unsigned TypeSizeInBits = Type.getSizeInBits();
37880-
// Return the lowest TypeSizeInBits bits.
37881-
MVT ResVT = MVT::getVectorVT(Type, SadVT.getSizeInBits() / TypeSizeInBits);
37871+
unsigned ExtractSizeInBits = ExtractVT.getSizeInBits();
37872+
// Return the lowest ExtractSizeInBits bits.
37873+
EVT ResVT = EVT::getVectorVT(*DAG.getContext(), ExtractVT,
37874+
SadVT.getSizeInBits() / ExtractSizeInBits);
3788237875
SAD = DAG.getBitcast(ResVT, SAD);
37883-
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Type, SAD,
37876+
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractVT, SAD,
3788437877
Extract->getOperand(1));
3788537878
}
3788637879

0 commit comments

Comments
 (0)