You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
This is similar to what I recently did for getArithmeticReductionCost.
I'm trying to account for the narrowing from 512->256->128 as we go.
I've also added a new helper method getMinMaxCost that tries to
handle the cases where we have native min/max instructions and
fall back to cmp+select when we don't.
Differential Revision: https://reviews.llvm.org/D76634
Copy file name to clipboardExpand all lines: llvm/test/Analysis/CostModel/X86/reduce-fmax.ll
+30-48Lines changed: 30 additions & 48 deletions
Original file line number
Diff line number
Diff line change
@@ -12,26 +12,26 @@
12
12
definei32@reduce_f64(i32%arg) {
13
13
; SSE-LABEL: 'reduce_f64'
14
14
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> undef)
15
-
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> undef)
16
-
; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> undef)
17
-
; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> undef)
18
-
; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> undef)
15
+
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> undef)
16
+
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> undef)
17
+
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> undef)
18
+
; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> undef)
19
19
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
20
20
;
21
21
; AVX-LABEL: 'reduce_f64'
22
22
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> undef)
23
-
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> undef)
24
-
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> undef)
25
-
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> undef)
26
-
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> undef)
23
+
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> undef)
24
+
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> undef)
25
+
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> undef)
26
+
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> undef)
27
27
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
28
28
;
29
29
; AVX512-LABEL: 'reduce_f64'
30
30
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> undef)
31
-
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> undef)
32
-
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> undef)
33
-
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> undef)
34
-
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> undef)
31
+
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> undef)
32
+
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> undef)
33
+
; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> undef)
34
+
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> undef)
35
35
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
36
36
;
37
37
%V1 = calldouble@llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef)
48
-
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef)
49
-
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef)
50
-
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef)
51
-
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef)
52
-
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
53
-
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
54
-
;
55
-
; SSSE3-LABEL: 'reduce_f32'
56
-
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef)
57
-
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef)
58
-
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef)
59
-
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef)
60
-
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef)
61
-
; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
62
-
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
63
-
;
64
-
; SSE4-LABEL: 'reduce_f32'
65
-
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef)
66
-
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef)
67
-
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef)
68
-
; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef)
69
-
; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef)
70
-
; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
71
-
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
46
+
; SSE-LABEL: 'reduce_f32'
47
+
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef)
48
+
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef)
49
+
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef)
50
+
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef)
51
+
; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef)
52
+
; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
53
+
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
72
54
;
73
55
; AVX-LABEL: 'reduce_f32'
74
56
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef)
75
-
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef)
76
-
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef)
77
-
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef)
78
-
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef)
79
-
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
57
+
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef)
58
+
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef)
59
+
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef)
60
+
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef)
61
+
; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
80
62
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
81
63
;
82
64
; AVX512-LABEL: 'reduce_f32'
83
65
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef)
84
-
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef)
85
-
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef)
86
-
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef)
87
-
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef)
88
-
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
66
+
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef)
67
+
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef)
68
+
; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef)
69
+
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef)
70
+
; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
89
71
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
90
72
;
91
73
%V1 = callfloat@llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef)
0 commit comments