@@ -199,139 +199,17 @@ define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) {
199
199
; SIMD128-LABEL: mul_v16i8:
200
200
; SIMD128: .functype mul_v16i8 (v128, v128) -> (v128)
201
201
; SIMD128-NEXT: # %bb.0:
202
- ; SIMD128-NEXT: i8x16.extract_lane_u $push4=, $0, 0
203
- ; SIMD128-NEXT: i8x16.extract_lane_u $push3=, $1, 0
204
- ; SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3
205
- ; SIMD128-NEXT: i8x16.splat $push6=, $pop5
206
- ; SIMD128-NEXT: i8x16.extract_lane_u $push1=, $0, 1
207
- ; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $1, 1
208
- ; SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0
209
- ; SIMD128-NEXT: i8x16.replace_lane $push7=, $pop6, 1, $pop2
210
- ; SIMD128-NEXT: i8x16.extract_lane_u $push9=, $0, 2
211
- ; SIMD128-NEXT: i8x16.extract_lane_u $push8=, $1, 2
212
- ; SIMD128-NEXT: i32.mul $push10=, $pop9, $pop8
213
- ; SIMD128-NEXT: i8x16.replace_lane $push11=, $pop7, 2, $pop10
214
- ; SIMD128-NEXT: i8x16.extract_lane_u $push13=, $0, 3
215
- ; SIMD128-NEXT: i8x16.extract_lane_u $push12=, $1, 3
216
- ; SIMD128-NEXT: i32.mul $push14=, $pop13, $pop12
217
- ; SIMD128-NEXT: i8x16.replace_lane $push15=, $pop11, 3, $pop14
218
- ; SIMD128-NEXT: i8x16.extract_lane_u $push17=, $0, 4
219
- ; SIMD128-NEXT: i8x16.extract_lane_u $push16=, $1, 4
220
- ; SIMD128-NEXT: i32.mul $push18=, $pop17, $pop16
221
- ; SIMD128-NEXT: i8x16.replace_lane $push19=, $pop15, 4, $pop18
222
- ; SIMD128-NEXT: i8x16.extract_lane_u $push21=, $0, 5
223
- ; SIMD128-NEXT: i8x16.extract_lane_u $push20=, $1, 5
224
- ; SIMD128-NEXT: i32.mul $push22=, $pop21, $pop20
225
- ; SIMD128-NEXT: i8x16.replace_lane $push23=, $pop19, 5, $pop22
226
- ; SIMD128-NEXT: i8x16.extract_lane_u $push25=, $0, 6
227
- ; SIMD128-NEXT: i8x16.extract_lane_u $push24=, $1, 6
228
- ; SIMD128-NEXT: i32.mul $push26=, $pop25, $pop24
229
- ; SIMD128-NEXT: i8x16.replace_lane $push27=, $pop23, 6, $pop26
230
- ; SIMD128-NEXT: i8x16.extract_lane_u $push29=, $0, 7
231
- ; SIMD128-NEXT: i8x16.extract_lane_u $push28=, $1, 7
232
- ; SIMD128-NEXT: i32.mul $push30=, $pop29, $pop28
233
- ; SIMD128-NEXT: i8x16.replace_lane $push31=, $pop27, 7, $pop30
234
- ; SIMD128-NEXT: i8x16.extract_lane_u $push33=, $0, 8
235
- ; SIMD128-NEXT: i8x16.extract_lane_u $push32=, $1, 8
236
- ; SIMD128-NEXT: i32.mul $push34=, $pop33, $pop32
237
- ; SIMD128-NEXT: i8x16.replace_lane $push35=, $pop31, 8, $pop34
238
- ; SIMD128-NEXT: i8x16.extract_lane_u $push37=, $0, 9
239
- ; SIMD128-NEXT: i8x16.extract_lane_u $push36=, $1, 9
240
- ; SIMD128-NEXT: i32.mul $push38=, $pop37, $pop36
241
- ; SIMD128-NEXT: i8x16.replace_lane $push39=, $pop35, 9, $pop38
242
- ; SIMD128-NEXT: i8x16.extract_lane_u $push41=, $0, 10
243
- ; SIMD128-NEXT: i8x16.extract_lane_u $push40=, $1, 10
244
- ; SIMD128-NEXT: i32.mul $push42=, $pop41, $pop40
245
- ; SIMD128-NEXT: i8x16.replace_lane $push43=, $pop39, 10, $pop42
246
- ; SIMD128-NEXT: i8x16.extract_lane_u $push45=, $0, 11
247
- ; SIMD128-NEXT: i8x16.extract_lane_u $push44=, $1, 11
248
- ; SIMD128-NEXT: i32.mul $push46=, $pop45, $pop44
249
- ; SIMD128-NEXT: i8x16.replace_lane $push47=, $pop43, 11, $pop46
250
- ; SIMD128-NEXT: i8x16.extract_lane_u $push49=, $0, 12
251
- ; SIMD128-NEXT: i8x16.extract_lane_u $push48=, $1, 12
252
- ; SIMD128-NEXT: i32.mul $push50=, $pop49, $pop48
253
- ; SIMD128-NEXT: i8x16.replace_lane $push51=, $pop47, 12, $pop50
254
- ; SIMD128-NEXT: i8x16.extract_lane_u $push53=, $0, 13
255
- ; SIMD128-NEXT: i8x16.extract_lane_u $push52=, $1, 13
256
- ; SIMD128-NEXT: i32.mul $push54=, $pop53, $pop52
257
- ; SIMD128-NEXT: i8x16.replace_lane $push55=, $pop51, 13, $pop54
258
- ; SIMD128-NEXT: i8x16.extract_lane_u $push57=, $0, 14
259
- ; SIMD128-NEXT: i8x16.extract_lane_u $push56=, $1, 14
260
- ; SIMD128-NEXT: i32.mul $push58=, $pop57, $pop56
261
- ; SIMD128-NEXT: i8x16.replace_lane $push59=, $pop55, 14, $pop58
262
- ; SIMD128-NEXT: i8x16.extract_lane_u $push61=, $0, 15
263
- ; SIMD128-NEXT: i8x16.extract_lane_u $push60=, $1, 15
264
- ; SIMD128-NEXT: i32.mul $push62=, $pop61, $pop60
265
- ; SIMD128-NEXT: i8x16.replace_lane $push63=, $pop59, 15, $pop62
266
- ; SIMD128-NEXT: return $pop63
202
+ ; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push1=, $0, $1
203
+ ; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push0=, $0, $1
204
+ ; SIMD128-NEXT: i8x16.shuffle $push2=, $pop1, $pop0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
205
+ ; SIMD128-NEXT: return $pop2
267
206
;
268
207
; SIMD128-FAST-LABEL: mul_v16i8:
269
208
; SIMD128-FAST: .functype mul_v16i8 (v128, v128) -> (v128)
270
209
; SIMD128-FAST-NEXT: # %bb.0:
271
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push5=, $0, 0
272
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push4=, $1, 0
273
- ; SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4
274
- ; SIMD128-FAST-NEXT: i8x16.splat $push7=, $pop6
275
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push2=, $0, 1
276
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push1=, $1, 1
277
- ; SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1
278
- ; SIMD128-FAST-NEXT: i8x16.replace_lane $push8=, $pop7, 1, $pop3
279
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push10=, $0, 2
280
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push9=, $1, 2
281
- ; SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9
282
- ; SIMD128-FAST-NEXT: i8x16.replace_lane $push12=, $pop8, 2, $pop11
283
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push14=, $0, 3
284
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push13=, $1, 3
285
- ; SIMD128-FAST-NEXT: i32.mul $push15=, $pop14, $pop13
286
- ; SIMD128-FAST-NEXT: i8x16.replace_lane $push16=, $pop12, 3, $pop15
287
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push18=, $0, 4
288
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push17=, $1, 4
289
- ; SIMD128-FAST-NEXT: i32.mul $push19=, $pop18, $pop17
290
- ; SIMD128-FAST-NEXT: i8x16.replace_lane $push20=, $pop16, 4, $pop19
291
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push22=, $0, 5
292
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push21=, $1, 5
293
- ; SIMD128-FAST-NEXT: i32.mul $push23=, $pop22, $pop21
294
- ; SIMD128-FAST-NEXT: i8x16.replace_lane $push24=, $pop20, 5, $pop23
295
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push26=, $0, 6
296
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push25=, $1, 6
297
- ; SIMD128-FAST-NEXT: i32.mul $push27=, $pop26, $pop25
298
- ; SIMD128-FAST-NEXT: i8x16.replace_lane $push28=, $pop24, 6, $pop27
299
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push30=, $0, 7
300
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push29=, $1, 7
301
- ; SIMD128-FAST-NEXT: i32.mul $push31=, $pop30, $pop29
302
- ; SIMD128-FAST-NEXT: i8x16.replace_lane $push32=, $pop28, 7, $pop31
303
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push34=, $0, 8
304
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push33=, $1, 8
305
- ; SIMD128-FAST-NEXT: i32.mul $push35=, $pop34, $pop33
306
- ; SIMD128-FAST-NEXT: i8x16.replace_lane $push36=, $pop32, 8, $pop35
307
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push38=, $0, 9
308
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push37=, $1, 9
309
- ; SIMD128-FAST-NEXT: i32.mul $push39=, $pop38, $pop37
310
- ; SIMD128-FAST-NEXT: i8x16.replace_lane $push40=, $pop36, 9, $pop39
311
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push42=, $0, 10
312
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push41=, $1, 10
313
- ; SIMD128-FAST-NEXT: i32.mul $push43=, $pop42, $pop41
314
- ; SIMD128-FAST-NEXT: i8x16.replace_lane $push44=, $pop40, 10, $pop43
315
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push46=, $0, 11
316
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push45=, $1, 11
317
- ; SIMD128-FAST-NEXT: i32.mul $push47=, $pop46, $pop45
318
- ; SIMD128-FAST-NEXT: i8x16.replace_lane $push48=, $pop44, 11, $pop47
319
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push50=, $0, 12
320
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push49=, $1, 12
321
- ; SIMD128-FAST-NEXT: i32.mul $push51=, $pop50, $pop49
322
- ; SIMD128-FAST-NEXT: i8x16.replace_lane $push52=, $pop48, 12, $pop51
323
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push54=, $0, 13
324
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push53=, $1, 13
325
- ; SIMD128-FAST-NEXT: i32.mul $push55=, $pop54, $pop53
326
- ; SIMD128-FAST-NEXT: i8x16.replace_lane $push56=, $pop52, 13, $pop55
327
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push58=, $0, 14
328
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push57=, $1, 14
329
- ; SIMD128-FAST-NEXT: i32.mul $push59=, $pop58, $pop57
330
- ; SIMD128-FAST-NEXT: i8x16.replace_lane $push60=, $pop56, 14, $pop59
331
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push62=, $0, 15
332
- ; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push61=, $1, 15
333
- ; SIMD128-FAST-NEXT: i32.mul $push63=, $pop62, $pop61
334
- ; SIMD128-FAST-NEXT: i8x16.replace_lane $push0=, $pop60, 15, $pop63
210
+ ; SIMD128-FAST-NEXT: i16x8.extmul_low_i8x16_u $push2=, $0, $1
211
+ ; SIMD128-FAST-NEXT: i16x8.extmul_high_i8x16_u $push1=, $0, $1
212
+ ; SIMD128-FAST-NEXT: i8x16.shuffle $push0=, $pop2, $pop1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
335
213
; SIMD128-FAST-NEXT: return $pop0
336
214
;
337
215
; NO-SIMD128-LABEL: mul_v16i8:
0 commit comments