Skip to content

Commit 76128cf

Browse files
committed
[X86] Add FMA commuting test case for D75016
This test case shows extra moves due to not fully considering all commuting opportunities.
1 parent 43c8307 commit 76128cf

File tree

1 file changed

+116
-0
lines changed

1 file changed

+116
-0
lines changed
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512f | FileCheck %s
3+
4+
define void @eggs(<8 x double>* %arg, <8 x double>* %arg1, <8 x double>* %arg2, <8 x double>* %arg3, <8 x double>* %arg4, <8 x double>* %arg5, i64 %arg6, i64 %arg7, i64 %arg8, i64 %arg9, i64 %arg10, i64 %arg11, i64 %arg12, double* %arg13, double* %arg14) nounwind {
5+
; CHECK-LABEL: eggs:
6+
; CHECK: ## %bb.0: ## %bb
7+
; CHECK-NEXT: pushq %r15
8+
; CHECK-NEXT: pushq %r14
9+
; CHECK-NEXT: pushq %r13
10+
; CHECK-NEXT: pushq %r12
11+
; CHECK-NEXT: pushq %rbx
12+
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
13+
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r10
14+
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r11
15+
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r12
16+
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r15
17+
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r14
18+
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbx
19+
; CHECK-NEXT: leaq (%rbx,%r14,8), %r14
20+
; CHECK-NEXT: leaq (%rbx,%r15,8), %r15
21+
; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
22+
; CHECK-NEXT: xorl %ebx, %ebx
23+
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r13
24+
; CHECK-NEXT: addq %r12, %r13
25+
; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %r12
26+
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
27+
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
28+
; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
29+
; CHECK-NEXT: vxorpd %xmm5, %xmm5, %xmm5
30+
; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
31+
; CHECK-NEXT: .p2align 4, 0x90
32+
; CHECK-NEXT: LBB0_1: ## %bb15
33+
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
34+
; CHECK-NEXT: vmovapd %zmm5, %zmm6
35+
; CHECK-NEXT: vmovapd %zmm4, %zmm7
36+
; CHECK-NEXT: vmovupd (%rax,%r11,8), %zmm4
37+
; CHECK-NEXT: vmovupd (%rax,%r13,8), %zmm5
38+
; CHECK-NEXT: vmovupd (%rax,%r12,8), %zmm8
39+
; CHECK-NEXT: vbroadcastsd (%r15,%rbx,8), %zmm9
40+
; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm0 = (zmm4 * zmm9) + zmm0
41+
; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm1 = (zmm5 * zmm9) + zmm1
42+
; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm2 = (zmm8 * zmm9) + zmm2
43+
; CHECK-NEXT: vbroadcastsd (%r14,%rbx,8), %zmm9
44+
; CHECK-NEXT: vfmadd213pd {{.*#+}} zmm4 = (zmm9 * zmm4) + zmm7
45+
; CHECK-NEXT: vfmadd213pd {{.*#+}} zmm5 = (zmm9 * zmm5) + zmm6
46+
; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm3 = (zmm8 * zmm9) + zmm3
47+
; CHECK-NEXT: incq %rbx
48+
; CHECK-NEXT: cmpq %rbx, %r10
49+
; CHECK-NEXT: jne LBB0_1
50+
; CHECK-NEXT: ## %bb.2: ## %bb51
51+
; CHECK-NEXT: vmovapd %zmm0, (%rdi)
52+
; CHECK-NEXT: vmovapd %zmm1, (%rsi)
53+
; CHECK-NEXT: vmovapd %zmm2, (%rdx)
54+
; CHECK-NEXT: vmovapd %zmm4, (%rcx)
55+
; CHECK-NEXT: vmovapd %zmm5, (%r8)
56+
; CHECK-NEXT: vmovapd %zmm3, (%r9)
57+
; CHECK-NEXT: popq %rbx
58+
; CHECK-NEXT: popq %r12
59+
; CHECK-NEXT: popq %r13
60+
; CHECK-NEXT: popq %r14
61+
; CHECK-NEXT: popq %r15
62+
; CHECK-NEXT: vzeroupper
63+
; CHECK-NEXT: retq
64+
bb:
65+
br label %bb15
66+
67+
bb15: ; preds = %bb15, %bb
68+
%tmp = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp38, %bb15 ]
69+
%tmp16 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp39, %bb15 ]
70+
%tmp17 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp40, %bb15 ]
71+
%tmp18 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp46, %bb15 ]
72+
%tmp19 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp47, %bb15 ]
73+
%tmp20 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp48, %bb15 ]
74+
%tmp21 = phi i64 [ 0, %bb ], [ %tmp49, %bb15 ]
75+
%tmp22 = getelementptr inbounds double, double* %arg14, i64 %arg11
76+
%tmp23 = bitcast double* %tmp22 to <8 x double>*
77+
%tmp24 = load <8 x double>, <8 x double>* %tmp23, align 8
78+
%tmp25 = add i64 %arg10, %arg6
79+
%tmp26 = getelementptr inbounds double, double* %arg14, i64 %tmp25
80+
%tmp27 = bitcast double* %tmp26 to <8 x double>*
81+
%tmp28 = load <8 x double>, <8 x double>* %tmp27, align 8
82+
%tmp29 = add i64 %arg10, %arg7
83+
%tmp30 = getelementptr inbounds double, double* %arg14, i64 %tmp29
84+
%tmp31 = bitcast double* %tmp30 to <8 x double>*
85+
%tmp32 = load <8 x double>, <8 x double>* %tmp31, align 8
86+
%tmp33 = add i64 %tmp21, %arg8
87+
%tmp34 = getelementptr inbounds double, double* %arg13, i64 %tmp33
88+
%tmp35 = load double, double* %tmp34, align 8
89+
%tmp36 = insertelement <8 x double> undef, double %tmp35, i32 0
90+
%tmp37 = shufflevector <8 x double> %tmp36, <8 x double> undef, <8 x i32> zeroinitializer
91+
%tmp38 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp24, <8 x double> %tmp37, <8 x double> %tmp)
92+
%tmp39 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp28, <8 x double> %tmp37, <8 x double> %tmp16)
93+
%tmp40 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp32, <8 x double> %tmp37, <8 x double> %tmp17)
94+
%tmp41 = add i64 %tmp21, %arg9
95+
%tmp42 = getelementptr inbounds double, double* %arg13, i64 %tmp41
96+
%tmp43 = load double, double* %tmp42, align 8
97+
%tmp44 = insertelement <8 x double> undef, double %tmp43, i32 0
98+
%tmp45 = shufflevector <8 x double> %tmp44, <8 x double> undef, <8 x i32> zeroinitializer
99+
%tmp46 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp24, <8 x double> %tmp45, <8 x double> %tmp18)
100+
%tmp47 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp28, <8 x double> %tmp45, <8 x double> %tmp19)
101+
%tmp48 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp32, <8 x double> %tmp45, <8 x double> %tmp20)
102+
%tmp49 = add nuw nsw i64 %tmp21, 1
103+
%tmp50 = icmp eq i64 %tmp49, %arg12
104+
br i1 %tmp50, label %bb51, label %bb15
105+
106+
bb51: ; preds = %bb15
107+
store <8 x double> %tmp38, <8 x double>* %arg
108+
store <8 x double> %tmp39, <8 x double>* %arg1
109+
store <8 x double> %tmp40, <8 x double>* %arg2
110+
store <8 x double> %tmp46, <8 x double>* %arg3
111+
store <8 x double> %tmp47, <8 x double>* %arg4
112+
store <8 x double> %tmp48, <8 x double>* %arg5
113+
ret void
114+
}
115+
116+
declare <8 x double> @llvm.fmuladd.v8f64(<8 x double>, <8 x double>, <8 x double>)

0 commit comments

Comments
 (0)