@@ -139,6 +139,87 @@ entry:
139
139
ret i32 %res
140
140
}
141
141
142
+ ; CHECK-LABEL: exchange_multi_use_64_1
143
+ ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
144
+ ; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
145
+ ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
146
+ ; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
147
+ ; CHECK: [[GEP:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
148
+ ; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP]] to i32*
149
+ ; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
150
+ ; CHECK: [[X:%[^ ]+]] = call i64 @llvm.arm.smlaldx(i32 [[LD_A]], i32 [[LD_B]], i64 %acc
151
+ ; CHECK: call i64 @llvm.arm.smlald(i32 [[LD_A_2]], i32 [[LD_B]], i64 [[X]])
152
+ define i64 @exchange_multi_use_64_1 (i16* %a , i16* %b , i64 %acc ) {
153
+ entry:
154
+ %addr.a.1 = getelementptr i16 , i16* %a , i32 1
155
+ %addr.b.1 = getelementptr i16 , i16* %b , i32 1
156
+ %ld.a.0 = load i16 , i16* %a
157
+ %sext.a.0 = sext i16 %ld.a.0 to i32
158
+ %ld.b.0 = load i16 , i16* %b
159
+ %ld.a.1 = load i16 , i16* %addr.a.1
160
+ %ld.b.1 = load i16 , i16* %addr.b.1
161
+ %sext.a.1 = sext i16 %ld.a.1 to i32
162
+ %sext.b.1 = sext i16 %ld.b.1 to i32
163
+ %sext.b.0 = sext i16 %ld.b.0 to i32
164
+ %mul.0 = mul i32 %sext.a.0 , %sext.b.1
165
+ %mul.1 = mul i32 %sext.a.1 , %sext.b.0
166
+ %add = add i32 %mul.0 , %mul.1
167
+ %addr.a.2 = getelementptr i16 , i16* %a , i32 2
168
+ %addr.a.3 = getelementptr i16 , i16* %a , i32 3
169
+ %ld.a.2 = load i16 , i16* %addr.a.2
170
+ %ld.a.3 = load i16 , i16* %addr.a.3
171
+ %sext.a.2 = sext i16 %ld.a.2 to i32
172
+ %sext.a.3 = sext i16 %ld.a.3 to i32
173
+ %mul.2 = mul i32 %sext.a.3 , %sext.b.1
174
+ %mul.3 = mul i32 %sext.a.2 , %sext.b.0
175
+ %add.1 = add i32 %mul.2 , %mul.3
176
+ %add.2 = add i32 %add , %add.1
177
+ %sext.add.2 = sext i32 %add.2 to i64
178
+ %res = add i64 %sext.add.2 , %acc
179
+ ret i64 %res
180
+ }
181
+
182
+ ; CHECK-LABEL: exchange_multi_use_64_2
183
+ ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
184
+ ; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
185
+ ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
186
+ ; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
187
+ ; CHECK: [[GEP:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
188
+ ; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP]] to i32*
189
+ ; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
190
+ ; CHECK: [[X:%[^ ]+]] = call i64 @llvm.arm.smlaldx(i32 [[LD_A]], i32 [[LD_B]], i64 %acc
191
+ ; CHECK: call i64 @llvm.arm.smlald(i32 [[LD_A_2]], i32 [[LD_B]], i64 [[X]])
192
+ define i64 @exchange_multi_use_64_2 (i16* %a , i16* %b , i64 %acc ) {
193
+ entry:
194
+ %addr.a.1 = getelementptr i16 , i16* %a , i32 1
195
+ %addr.b.1 = getelementptr i16 , i16* %b , i32 1
196
+ %ld.a.0 = load i16 , i16* %a
197
+ %sext.a.0 = sext i16 %ld.a.0 to i32
198
+ %ld.b.0 = load i16 , i16* %b
199
+ %ld.a.1 = load i16 , i16* %addr.a.1
200
+ %ld.b.1 = load i16 , i16* %addr.b.1
201
+ %sext.a.1 = sext i16 %ld.a.1 to i32
202
+ %sext.b.1 = sext i16 %ld.b.1 to i32
203
+ %sext.b.0 = sext i16 %ld.b.0 to i32
204
+ %mul.0 = mul i32 %sext.a.0 , %sext.b.1
205
+ %mul.1 = mul i32 %sext.a.1 , %sext.b.0
206
+ %add = add i32 %mul.0 , %mul.1
207
+ %sext.add = sext i32 %add to i64
208
+ %addr.a.2 = getelementptr i16 , i16* %a , i32 2
209
+ %addr.a.3 = getelementptr i16 , i16* %a , i32 3
210
+ %ld.a.2 = load i16 , i16* %addr.a.2
211
+ %ld.a.3 = load i16 , i16* %addr.a.3
212
+ %sext.a.2 = sext i16 %ld.a.2 to i32
213
+ %sext.a.3 = sext i16 %ld.a.3 to i32
214
+ %mul.2 = mul i32 %sext.a.3 , %sext.b.1
215
+ %mul.3 = mul i32 %sext.a.2 , %sext.b.0
216
+ %add.1 = add i32 %mul.2 , %mul.3
217
+ %sext.add.1 = sext i32 %add.1 to i64
218
+ %add.2 = add i64 %sext.add , %sext.add.1
219
+ %res = add i64 %add.2 , %acc
220
+ ret i64 %res
221
+ }
222
+
142
223
; CHECK-LABEL: exchange_multi_use_2
143
224
; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
144
225
; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
@@ -216,6 +297,48 @@ entry:
216
297
ret i32 %res
217
298
}
218
299
300
+ ; TODO: Would it be better to generate a smlad and then sign extend it?
301
+ ; CHECK-LABEL: exchange_multi_use_64_3
302
+ ; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
303
+ ; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
304
+ ; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
305
+ ; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
306
+ ; CHECK: [[GEP:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
307
+ ; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP]] to i32*
308
+ ; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
309
+ ; CHECK: [[ACC:%[^ ]+]] = call i64 @llvm.arm.smlald(i32 [[LD_A]], i32 [[LD_B]], i64 0)
310
+ ; CHECK: [[X:%[^ ]+]] = call i64 @llvm.arm.smlaldx(i32 [[LD_B]], i32 [[LD_A_2]], i64 [[ACC]])
311
+ define i64 @exchange_multi_use_64_3 (i16* %a , i16* %b , i64 %acc ) {
312
+ entry:
313
+ %addr.a.1 = getelementptr i16 , i16* %a , i32 1
314
+ %addr.b.1 = getelementptr i16 , i16* %b , i32 1
315
+ %ld.a.0 = load i16 , i16* %a
316
+ %sext.a.0 = sext i16 %ld.a.0 to i32
317
+ %ld.b.0 = load i16 , i16* %b
318
+ %ld.a.1 = load i16 , i16* %addr.a.1
319
+ %ld.b.1 = load i16 , i16* %addr.b.1
320
+ %sext.a.1 = sext i16 %ld.a.1 to i32
321
+ %sext.b.1 = sext i16 %ld.b.1 to i32
322
+ %sext.b.0 = sext i16 %ld.b.0 to i32
323
+ %addr.a.2 = getelementptr i16 , i16* %a , i32 2
324
+ %addr.a.3 = getelementptr i16 , i16* %a , i32 3
325
+ %ld.a.2 = load i16 , i16* %addr.a.2
326
+ %ld.a.3 = load i16 , i16* %addr.a.3
327
+ %sext.a.2 = sext i16 %ld.a.2 to i32
328
+ %sext.a.3 = sext i16 %ld.a.3 to i32
329
+ %mul.2 = mul i32 %sext.b.0 , %sext.a.3
330
+ %mul.3 = mul i32 %sext.b.1 , %sext.a.2
331
+ %mul.0 = mul i32 %sext.a.0 , %sext.b.0
332
+ %mul.1 = mul i32 %sext.a.1 , %sext.b.1
333
+ %add = add i32 %mul.0 , %mul.1
334
+ %add.1 = add i32 %mul.2 , %mul.3
335
+ %sext.add = sext i32 %add to i64
336
+ %sext.add.1 = sext i32 %add.1 to i64
337
+ %add.2 = add i64 %sext.add , %sext.add.1
338
+ %res = sub i64 %acc , %add.2
339
+ ret i64 %res
340
+ }
341
+
219
342
; TODO: Why isn't smladx generated too?
220
343
; CHECK-LABEL: exchange_multi_use_4
221
344
; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
0 commit comments