Skip to content

Commit a091f70

Browse files
committed
[CostModel][X86] Improve add vXi64 + fadd vXf64 reduction tests for SLM
As noted on D59710 we weren't handling the high costs of these operations on SLM.
1 parent 3ddac7e commit a091f70

File tree

3 files changed

+38
-12
lines changed

3 files changed

+38
-12
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2534,6 +2534,11 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
25342534
// We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
25352535
// and make it as the cost.
25362536

2537+
static const CostTblEntry SLMCostTblPairWise[] = {
2538+
{ ISD::FADD, MVT::v2f64, 3 },
2539+
{ ISD::ADD, MVT::v2i64, 5 },
2540+
};
2541+
25372542
static const CostTblEntry SSE2CostTblPairWise[] = {
25382543
{ ISD::FADD, MVT::v2f64, 2 },
25392544
{ ISD::FADD, MVT::v4f32, 4 },
@@ -2559,6 +2564,11 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
25592564
{ ISD::ADD, MVT::v32i8, 4 },
25602565
};
25612566

2567+
static const CostTblEntry SLMCostTblNoPairWise[] = {
2568+
{ ISD::FADD, MVT::v2f64, 3 },
2569+
{ ISD::ADD, MVT::v2i64, 5 },
2570+
};
2571+
25622572
static const CostTblEntry SSE2CostTblNoPairWise[] = {
25632573
{ ISD::FADD, MVT::v2f64, 2 },
25642574
{ ISD::FADD, MVT::v4f32, 4 },
@@ -2595,6 +2605,10 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
25952605
if (VT.isSimple()) {
25962606
MVT MTy = VT.getSimpleVT();
25972607
if (IsPairwise) {
2608+
if (ST->isSLM())
2609+
if (const auto *Entry = CostTableLookup(SLMCostTblPairWise, ISD, MTy))
2610+
return Entry->Cost;
2611+
25982612
if (ST->hasAVX())
25992613
if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
26002614
return Entry->Cost;
@@ -2603,6 +2617,10 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
26032617
if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy))
26042618
return Entry->Cost;
26052619
} else {
2620+
if (ST->isSLM())
2621+
if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy))
2622+
return Entry->Cost;
2623+
26062624
if (ST->hasAVX())
26072625
if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
26082626
return Entry->Cost;
@@ -2618,6 +2636,10 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
26182636
MVT MTy = LT.second;
26192637

26202638
if (IsPairwise) {
2639+
if (ST->isSLM())
2640+
if (const auto *Entry = CostTableLookup(SLMCostTblPairWise, ISD, MTy))
2641+
return LT.first * Entry->Cost;
2642+
26212643
if (ST->hasAVX())
26222644
if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
26232645
return LT.first * Entry->Cost;
@@ -2626,6 +2648,10 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
26262648
if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy))
26272649
return LT.first * Entry->Cost;
26282650
} else {
2651+
if (ST->isSLM())
2652+
if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy))
2653+
return LT.first * Entry->Cost;
2654+
26292655
if (ST->hasAVX())
26302656
if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
26312657
return LT.first * Entry->Cost;

llvm/test/Analysis/CostModel/X86/reduce-add.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@ define i32 @reduce_i64(i32 %arg) {
3737
;
3838
; SLM-LABEL: 'reduce_i64'
3939
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
40-
; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
41-
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
42-
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
43-
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
40+
; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
41+
; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
42+
; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
43+
; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
4444
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
4545
;
4646
%V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)

llvm/test/Analysis/CostModel/X86/reduction.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ define fastcc double @no_pairwise_reduction2double(<2 x double> %rdx, double %f1
356356
; SLM-LABEL: 'no_pairwise_reduction2double'
357357
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
358358
; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf
359-
; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0
359+
; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0
360360
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
361361
;
362362
%rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
@@ -462,7 +462,7 @@ define fastcc double @no_pairwise_reduction4double(<4 x double> %rdx, double %f1
462462
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf
463463
; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
464464
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7
465-
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0
465+
; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0
466466
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
467467
;
468468
%rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
@@ -562,7 +562,7 @@ define fastcc i64 @no_pairwise_reduction2i64(<2 x i64> %rdx, i64 %f1) {
562562
; SLM-LABEL: 'no_pairwise_reduction2i64'
563563
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
564564
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = add <2 x i64> %rdx, %rdx.shuf
565-
; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x i64> %bin.rdx, i32 0
565+
; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <2 x i64> %bin.rdx, i32 0
566566
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
567567
;
568568
%rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
@@ -628,7 +628,7 @@ define fastcc i64 @no_pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) {
628628
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bin.rdx = add <4 x i64> %rdx, %rdx.shuf
629629
; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
630630
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7
631-
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0
631+
; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0
632632
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
633633
;
634634
%rdx.shuf = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
@@ -787,7 +787,7 @@ define fastcc double @pairwise_reduction2double(<2 x double> %rdx, double %f1) {
787787
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
788788
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
789789
; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
790-
; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0
790+
; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0
791791
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
792792
;
793793
%rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
@@ -918,7 +918,7 @@ define fastcc double @pairwise_reduction4double(<4 x double> %rdx, double %f1) {
918918
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
919919
; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
920920
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
921-
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0
921+
; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0
922922
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
923923
;
924924
%rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
@@ -1044,7 +1044,7 @@ define fastcc i64 @pairwise_reduction2i64(<2 x i64> %rdx, i64 %f1) {
10441044
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 0, i32 undef>
10451045
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
10461046
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = add <2 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
1047-
; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0
1047+
; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0
10481048
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
10491049
;
10501050
%rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 0, i32 undef>
@@ -1125,7 +1125,7 @@ define fastcc i64 @pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) {
11251125
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
11261126
; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
11271127
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
1128-
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0
1128+
; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0
11291129
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
11301130
;
11311131
%rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>

0 commit comments

Comments
 (0)