|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512f | FileCheck %s |
| 3 | + |
| 4 | +define void @eggs(<8 x double>* %arg, <8 x double>* %arg1, <8 x double>* %arg2, <8 x double>* %arg3, <8 x double>* %arg4, <8 x double>* %arg5, i64 %arg6, i64 %arg7, i64 %arg8, i64 %arg9, i64 %arg10, i64 %arg11, i64 %arg12, double* %arg13, double* %arg14) nounwind { |
| 5 | +; CHECK-LABEL: eggs: |
| 6 | +; CHECK: ## %bb.0: ## %bb |
| 7 | +; CHECK-NEXT: pushq %r15 |
| 8 | +; CHECK-NEXT: pushq %r14 |
| 9 | +; CHECK-NEXT: pushq %r13 |
| 10 | +; CHECK-NEXT: pushq %r12 |
| 11 | +; CHECK-NEXT: pushq %rbx |
| 12 | +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax |
| 13 | +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| 14 | +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r11 |
| 15 | +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r12 |
| 16 | +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r15 |
| 17 | +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r14 |
| 18 | +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbx |
| 19 | +; CHECK-NEXT: leaq (%rbx,%r14,8), %r14 |
| 20 | +; CHECK-NEXT: leaq (%rbx,%r15,8), %r15 |
| 21 | +; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0 |
| 22 | +; CHECK-NEXT: xorl %ebx, %ebx |
| 23 | +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r13 |
| 24 | +; CHECK-NEXT: addq %r12, %r13 |
| 25 | +; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %r12 |
| 26 | +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 |
| 27 | +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 |
| 28 | +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 |
| 29 | +; CHECK-NEXT: vxorpd %xmm5, %xmm5, %xmm5 |
| 30 | +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 |
| 31 | +; CHECK-NEXT: .p2align 4, 0x90 |
| 32 | +; CHECK-NEXT: LBB0_1: ## %bb15 |
| 33 | +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 |
| 34 | +; CHECK-NEXT: vmovapd %zmm5, %zmm6 |
| 35 | +; CHECK-NEXT: vmovapd %zmm4, %zmm7 |
| 36 | +; CHECK-NEXT: vmovupd (%rax,%r11,8), %zmm4 |
| 37 | +; CHECK-NEXT: vmovupd (%rax,%r13,8), %zmm5 |
| 38 | +; CHECK-NEXT: vmovupd (%rax,%r12,8), %zmm8 |
| 39 | +; CHECK-NEXT: vbroadcastsd (%r15,%rbx,8), %zmm9 |
| 40 | +; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm0 = (zmm4 * zmm9) + zmm0 |
| 41 | +; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm1 = (zmm5 * zmm9) + zmm1 |
| 42 | +; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm2 = (zmm8 * zmm9) + zmm2 |
| 43 | +; CHECK-NEXT: vbroadcastsd (%r14,%rbx,8), %zmm9 |
| 44 | +; CHECK-NEXT: vfmadd213pd {{.*#+}} zmm4 = (zmm9 * zmm4) + zmm7 |
| 45 | +; CHECK-NEXT: vfmadd213pd {{.*#+}} zmm5 = (zmm9 * zmm5) + zmm6 |
| 46 | +; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm3 = (zmm8 * zmm9) + zmm3 |
| 47 | +; CHECK-NEXT: incq %rbx |
| 48 | +; CHECK-NEXT: cmpq %rbx, %r10 |
| 49 | +; CHECK-NEXT: jne LBB0_1 |
| 50 | +; CHECK-NEXT: ## %bb.2: ## %bb51 |
| 51 | +; CHECK-NEXT: vmovapd %zmm0, (%rdi) |
| 52 | +; CHECK-NEXT: vmovapd %zmm1, (%rsi) |
| 53 | +; CHECK-NEXT: vmovapd %zmm2, (%rdx) |
| 54 | +; CHECK-NEXT: vmovapd %zmm4, (%rcx) |
| 55 | +; CHECK-NEXT: vmovapd %zmm5, (%r8) |
| 56 | +; CHECK-NEXT: vmovapd %zmm3, (%r9) |
| 57 | +; CHECK-NEXT: popq %rbx |
| 58 | +; CHECK-NEXT: popq %r12 |
| 59 | +; CHECK-NEXT: popq %r13 |
| 60 | +; CHECK-NEXT: popq %r14 |
| 61 | +; CHECK-NEXT: popq %r15 |
| 62 | +; CHECK-NEXT: vzeroupper |
| 63 | +; CHECK-NEXT: retq |
| 64 | +bb: |
| 65 | + br label %bb15 |
| 66 | + |
| 67 | +bb15: ; preds = %bb15, %bb |
| 68 | + %tmp = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp38, %bb15 ] |
| 69 | + %tmp16 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp39, %bb15 ] |
| 70 | + %tmp17 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp40, %bb15 ] |
| 71 | + %tmp18 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp46, %bb15 ] |
| 72 | + %tmp19 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp47, %bb15 ] |
| 73 | + %tmp20 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp48, %bb15 ] |
| 74 | + %tmp21 = phi i64 [ 0, %bb ], [ %tmp49, %bb15 ] |
| 75 | + %tmp22 = getelementptr inbounds double, double* %arg14, i64 %arg11 |
| 76 | + %tmp23 = bitcast double* %tmp22 to <8 x double>* |
| 77 | + %tmp24 = load <8 x double>, <8 x double>* %tmp23, align 8 |
| 78 | + %tmp25 = add i64 %arg10, %arg6 |
| 79 | + %tmp26 = getelementptr inbounds double, double* %arg14, i64 %tmp25 |
| 80 | + %tmp27 = bitcast double* %tmp26 to <8 x double>* |
| 81 | + %tmp28 = load <8 x double>, <8 x double>* %tmp27, align 8 |
| 82 | + %tmp29 = add i64 %arg10, %arg7 |
| 83 | + %tmp30 = getelementptr inbounds double, double* %arg14, i64 %tmp29 |
| 84 | + %tmp31 = bitcast double* %tmp30 to <8 x double>* |
| 85 | + %tmp32 = load <8 x double>, <8 x double>* %tmp31, align 8 |
| 86 | + %tmp33 = add i64 %tmp21, %arg8 |
| 87 | + %tmp34 = getelementptr inbounds double, double* %arg13, i64 %tmp33 |
| 88 | + %tmp35 = load double, double* %tmp34, align 8 |
| 89 | + %tmp36 = insertelement <8 x double> undef, double %tmp35, i32 0 |
| 90 | + %tmp37 = shufflevector <8 x double> %tmp36, <8 x double> undef, <8 x i32> zeroinitializer |
| 91 | + %tmp38 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp24, <8 x double> %tmp37, <8 x double> %tmp) |
| 92 | + %tmp39 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp28, <8 x double> %tmp37, <8 x double> %tmp16) |
| 93 | + %tmp40 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp32, <8 x double> %tmp37, <8 x double> %tmp17) |
| 94 | + %tmp41 = add i64 %tmp21, %arg9 |
| 95 | + %tmp42 = getelementptr inbounds double, double* %arg13, i64 %tmp41 |
| 96 | + %tmp43 = load double, double* %tmp42, align 8 |
| 97 | + %tmp44 = insertelement <8 x double> undef, double %tmp43, i32 0 |
| 98 | + %tmp45 = shufflevector <8 x double> %tmp44, <8 x double> undef, <8 x i32> zeroinitializer |
| 99 | + %tmp46 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp24, <8 x double> %tmp45, <8 x double> %tmp18) |
| 100 | + %tmp47 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp28, <8 x double> %tmp45, <8 x double> %tmp19) |
| 101 | + %tmp48 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp32, <8 x double> %tmp45, <8 x double> %tmp20) |
| 102 | + %tmp49 = add nuw nsw i64 %tmp21, 1 |
| 103 | + %tmp50 = icmp eq i64 %tmp49, %arg12 |
| 104 | + br i1 %tmp50, label %bb51, label %bb15 |
| 105 | + |
| 106 | +bb51: ; preds = %bb15 |
| 107 | + store <8 x double> %tmp38, <8 x double>* %arg |
| 108 | + store <8 x double> %tmp39, <8 x double>* %arg1 |
| 109 | + store <8 x double> %tmp40, <8 x double>* %arg2 |
| 110 | + store <8 x double> %tmp46, <8 x double>* %arg3 |
| 111 | + store <8 x double> %tmp47, <8 x double>* %arg4 |
| 112 | + store <8 x double> %tmp48, <8 x double>* %arg5 |
| 113 | + ret void |
| 114 | +} |
| 115 | + |
| 116 | +declare <8 x double> @llvm.fmuladd.v8f64(<8 x double>, <8 x double>, <8 x double>) |
0 commit comments