@@ -14,15 +14,14 @@ target triple = "aarch64"
14
14
define %"class.std::complex" @complex_mul_v2f64 (ptr %a , ptr %b ) {
15
15
; CHECK-LABEL: complex_mul_v2f64:
16
16
; CHECK: // %bb.0: // %entry
17
+ ; CHECK-NEXT: movi v0.2d, #0000000000000000
17
18
; CHECK-NEXT: movi v1.2d, #0000000000000000
18
19
; CHECK-NEXT: cntd x8
19
- ; CHECK-NEXT: mov w10, #100 // =0x64
20
20
; CHECK-NEXT: neg x9, x8
21
+ ; CHECK-NEXT: mov w10, #100 // =0x64
21
22
; CHECK-NEXT: ptrue p0.d
22
23
; CHECK-NEXT: and x9, x9, x10
23
24
; CHECK-NEXT: rdvl x10, #2
24
- ; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
25
- ; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
26
25
; CHECK-NEXT: .LBB0_1: // %vector.body
27
26
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
28
27
; CHECK-NEXT: ldr z2, [x0, #1, mul vl]
@@ -32,14 +31,14 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
32
31
; CHECK-NEXT: ldr z5, [x1]
33
32
; CHECK-NEXT: add x1, x1, x10
34
33
; CHECK-NEXT: add x0, x0, x10
35
- ; CHECK-NEXT: fcmla z1 .d, p0/m, z5.d, z3.d, #0
36
- ; CHECK-NEXT: fcmla z0 .d, p0/m, z4.d, z2.d, #0
37
- ; CHECK-NEXT: fcmla z1 .d, p0/m, z5.d, z3.d, #90
38
- ; CHECK-NEXT: fcmla z0 .d, p0/m, z4.d, z2.d, #90
34
+ ; CHECK-NEXT: fcmla z0 .d, p0/m, z5.d, z3.d, #0
35
+ ; CHECK-NEXT: fcmla z1 .d, p0/m, z4.d, z2.d, #0
36
+ ; CHECK-NEXT: fcmla z0 .d, p0/m, z5.d, z3.d, #90
37
+ ; CHECK-NEXT: fcmla z1 .d, p0/m, z4.d, z2.d, #90
39
38
; CHECK-NEXT: b.ne .LBB0_1
40
39
; CHECK-NEXT: // %bb.2: // %exit.block
41
- ; CHECK-NEXT: uzp1 z2.d, z1 .d, z0 .d
42
- ; CHECK-NEXT: uzp2 z1.d, z1 .d, z0 .d
40
+ ; CHECK-NEXT: uzp1 z2.d, z0 .d, z1 .d
41
+ ; CHECK-NEXT: uzp2 z1.d, z0 .d, z1 .d
43
42
; CHECK-NEXT: faddv d0, p0, z2.d
44
43
; CHECK-NEXT: faddv d1, p0, z1.d
45
44
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -183,17 +182,16 @@ exit.block: ; preds = %vector.body
183
182
define %"class.std::complex" @complex_mul_v2f64_unrolled (ptr %a , ptr %b ) {
184
183
; CHECK-LABEL: complex_mul_v2f64_unrolled:
185
184
; CHECK: // %bb.0: // %entry
185
+ ; CHECK-NEXT: movi v0.2d, #0000000000000000
186
186
; CHECK-NEXT: movi v1.2d, #0000000000000000
187
187
; CHECK-NEXT: cntw x8
188
- ; CHECK-NEXT: mov w10, #1000 // =0x3e8
188
+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
189
+ ; CHECK-NEXT: movi v3.2d, #0000000000000000
189
190
; CHECK-NEXT: neg x9, x8
191
+ ; CHECK-NEXT: mov w10, #1000 // =0x3e8
190
192
; CHECK-NEXT: ptrue p0.d
191
193
; CHECK-NEXT: and x9, x9, x10
192
194
; CHECK-NEXT: rdvl x10, #4
193
- ; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
194
- ; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
195
- ; CHECK-NEXT: mov z2.d, z1.d
196
- ; CHECK-NEXT: mov z3.d, z0.d
197
195
; CHECK-NEXT: .LBB2_1: // %vector.body
198
196
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
199
197
; CHECK-NEXT: ldr z4, [x0, #1, mul vl]
@@ -207,20 +205,20 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
207
205
; CHECK-NEXT: ldr z18, [x1, #3, mul vl]
208
206
; CHECK-NEXT: ldr z19, [x1, #2, mul vl]
209
207
; CHECK-NEXT: add x1, x1, x10
210
- ; CHECK-NEXT: fcmla z1 .d, p0/m, z16.d, z5.d, #0
211
- ; CHECK-NEXT: fcmla z0 .d, p0/m, z7.d, z4.d, #0
208
+ ; CHECK-NEXT: fcmla z0 .d, p0/m, z16.d, z5.d, #0
209
+ ; CHECK-NEXT: fcmla z1 .d, p0/m, z7.d, z4.d, #0
212
210
; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #0
213
211
; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #0
214
- ; CHECK-NEXT: fcmla z1 .d, p0/m, z16.d, z5.d, #90
215
- ; CHECK-NEXT: fcmla z0 .d, p0/m, z7.d, z4.d, #90
212
+ ; CHECK-NEXT: fcmla z0 .d, p0/m, z16.d, z5.d, #90
213
+ ; CHECK-NEXT: fcmla z1 .d, p0/m, z7.d, z4.d, #90
216
214
; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #90
217
215
; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #90
218
216
; CHECK-NEXT: b.ne .LBB2_1
219
217
; CHECK-NEXT: // %bb.2: // %exit.block
220
218
; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d
221
- ; CHECK-NEXT: uzp1 z5.d, z1 .d, z0 .d
219
+ ; CHECK-NEXT: uzp1 z5.d, z0 .d, z1 .d
222
220
; CHECK-NEXT: uzp2 z2.d, z2.d, z3.d
223
- ; CHECK-NEXT: uzp2 z0.d, z1 .d, z0 .d
221
+ ; CHECK-NEXT: uzp2 z0.d, z0 .d, z1 .d
224
222
; CHECK-NEXT: fadd z1.d, z4.d, z5.d
225
223
; CHECK-NEXT: fadd z2.d, z2.d, z0.d
226
224
; CHECK-NEXT: faddv d0, p0, z1.d
@@ -310,15 +308,15 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
310
308
; CHECK-LABEL: reduction_mix:
311
309
; CHECK: // %bb.0: // %entry
312
310
; CHECK-NEXT: movi v2.2d, #0000000000000000
311
+ ; CHECK-NEXT: movi v0.2d, #0000000000000000
313
312
; CHECK-NEXT: cntd x9
314
- ; CHECK-NEXT: mov w11 , #100 // =0x64
313
+ ; CHECK-NEXT: movi v1.2d , #0000000000000000
315
314
; CHECK-NEXT: neg x10, x9
315
+ ; CHECK-NEXT: mov w11, #100 // =0x64
316
316
; CHECK-NEXT: ptrue p0.d
317
317
; CHECK-NEXT: mov x8, xzr
318
318
; CHECK-NEXT: and x10, x10, x11
319
319
; CHECK-NEXT: rdvl x11, #2
320
- ; CHECK-NEXT: zip2 z0.d, z2.d, z2.d
321
- ; CHECK-NEXT: zip1 z1.d, z2.d, z2.d
322
320
; CHECK-NEXT: .LBB3_1: // %vector.body
323
321
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
324
322
; CHECK-NEXT: ldr z3, [x0]
@@ -327,13 +325,13 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
327
325
; CHECK-NEXT: ld1w { z5.d }, p0/z, [x3, x8, lsl #2]
328
326
; CHECK-NEXT: add x8, x8, x9
329
327
; CHECK-NEXT: cmp x10, x8
330
- ; CHECK-NEXT: fadd z0 .d, z4.d, z0 .d
331
- ; CHECK-NEXT: fadd z1 .d, z3.d, z1 .d
328
+ ; CHECK-NEXT: fadd z1 .d, z4.d, z1 .d
329
+ ; CHECK-NEXT: fadd z0 .d, z3.d, z0 .d
332
330
; CHECK-NEXT: add z2.d, z5.d, z2.d
333
331
; CHECK-NEXT: b.ne .LBB3_1
334
332
; CHECK-NEXT: // %bb.2: // %middle.block
335
- ; CHECK-NEXT: uzp2 z3.d, z1 .d, z0 .d
336
- ; CHECK-NEXT: uzp1 z1.d, z1 .d, z0 .d
333
+ ; CHECK-NEXT: uzp2 z3.d, z0 .d, z1 .d
334
+ ; CHECK-NEXT: uzp1 z1.d, z0 .d, z1 .d
337
335
; CHECK-NEXT: uaddv d2, p0, z2.d
338
336
; CHECK-NEXT: faddv d0, p0, z3.d
339
337
; CHECK-NEXT: faddv d1, p0, z1.d
0 commit comments