Skip to content

Commit b3fe00b

Browse files
author
Evan Cheng
committed
padds{b|w}, paddus{b|w}, psubs{b|w}, psubus{b|w} intrinsics.
llvm-svn: 27639
1 parent 0aab735 commit b3fe00b

File tree

2 files changed

+106
-8
lines changed

2 files changed

+106
-8
lines changed

llvm/include/llvm/IntrinsicsX86.td

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,34 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
250250
llvm_v2f64_ty], [IntrNoMem]>;
251251
}
252252

253+
// Integer arithmetic ops.
254+
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
255+
def int_x86_sse2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb128">,
256+
Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
257+
llvm_v16i8_ty], [IntrNoMem]>;
258+
def int_x86_sse2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw128">,
259+
Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
260+
llvm_v8i16_ty], [IntrNoMem]>;
261+
def int_x86_sse2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb128">,
262+
Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
263+
llvm_v16i8_ty], [IntrNoMem]>;
264+
def int_x86_sse2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw128">,
265+
Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
266+
llvm_v8i16_ty], [IntrNoMem]>;
267+
def int_x86_sse2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb128">,
268+
Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
269+
llvm_v16i8_ty], [IntrNoMem]>;
270+
def int_x86_sse2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw128">,
271+
Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
272+
llvm_v8i16_ty], [IntrNoMem]>;
273+
def int_x86_sse2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb128">,
274+
Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
275+
llvm_v16i8_ty], [IntrNoMem]>;
276+
def int_x86_sse2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw128">,
277+
Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
278+
llvm_v8i16_ty], [IntrNoMem]>;
279+
}
280+
253281
// Integer shift ops.
254282
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
255283
def int_x86_sse2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi128">,

llvm/lib/Target/X86/X86InstrSSE.td

Lines changed: 78 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,23 +1283,59 @@ def PADDQrr : PDI<0xD4, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
12831283
"paddq {$src2, $dst|$dst, $src2}",
12841284
[(set VR128:$dst, (v2i64 (add VR128:$src1, VR128:$src2)))]>;
12851285
}
1286-
def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1286+
def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
12871287
"paddb {$src2, $dst|$dst, $src2}",
12881288
[(set VR128:$dst, (v16i8 (add VR128:$src1,
12891289
(load addr:$src2))))]>;
1290-
def PADDWrm : PDI<0xFD, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1290+
def PADDWrm : PDI<0xFD, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
12911291
"paddw {$src2, $dst|$dst, $src2}",
12921292
[(set VR128:$dst, (v8i16 (add VR128:$src1,
12931293
(load addr:$src2))))]>;
1294-
def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1294+
def PADDDrm : PDI<0xFE, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
12951295
"paddd {$src2, $dst|$dst, $src2}",
12961296
[(set VR128:$dst, (v4i32 (add VR128:$src1,
12971297
(load addr:$src2))))]>;
1298-
def PADDQrm : PDI<0xD4, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1298+
def PADDQrm : PDI<0xD4, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
12991299
"paddd {$src2, $dst|$dst, $src2}",
13001300
[(set VR128:$dst, (v2i64 (add VR128:$src1,
13011301
(load addr:$src2))))]>;
13021302

1303+
let isCommutable = 1 in {
1304+
def PADDSBrr : PDI<0xEC, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1305+
"paddsb {$src2, $dst|$dst, $src2}",
1306+
[(set VR128:$dst, (int_x86_sse2_padds_b VR128:$src1,
1307+
VR128:$src2))]>;
1308+
def PADDSWrr : PDI<0xED, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1309+
"paddsw {$src2, $dst|$dst, $src2}",
1310+
[(set VR128:$dst, (int_x86_sse2_padds_w VR128:$src1,
1311+
VR128:$src2))]>;
1312+
def PADDUSBrr : PDI<0xDC, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1313+
"paddusb {$src2, $dst|$dst, $src2}",
1314+
[(set VR128:$dst, (int_x86_sse2_paddus_b VR128:$src1,
1315+
VR128:$src2))]>;
1316+
def PADDUSWrr : PDI<0xDD, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1317+
"paddusw {$src2, $dst|$dst, $src2}",
1318+
[(set VR128:$dst, (int_x86_sse2_paddus_w VR128:$src1,
1319+
VR128:$src2))]>;
1320+
}
1321+
def PADDSBrm : PDI<0xEC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1322+
"paddsb {$src2, $dst|$dst, $src2}",
1323+
[(set VR128:$dst, (int_x86_sse2_padds_b VR128:$src1,
1324+
(bc_v16i8 (loadv2i64 addr:$src2))))]>;
1325+
def PADDSWrm : PDI<0xED, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1326+
"paddsw {$src2, $dst|$dst, $src2}",
1327+
[(set VR128:$dst, (int_x86_sse2_padds_w VR128:$src1,
1328+
(bc_v8i16 (loadv2i64 addr:$src2))))]>;
1329+
def PADDUSBrm : PDI<0xDC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1330+
"paddusb {$src2, $dst|$dst, $src2}",
1331+
[(set VR128:$dst, (int_x86_sse2_paddus_b VR128:$src1,
1332+
(bc_v16i8 (loadv2i64 addr:$src2))))]>;
1333+
def PADDUSWrm : PDI<0xDD, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1334+
"paddusw {$src2, $dst|$dst, $src2}",
1335+
[(set VR128:$dst, (int_x86_sse2_paddus_w VR128:$src1,
1336+
(bc_v8i16 (loadv2i64 addr:$src2))))]>;
1337+
1338+
13031339
def PSUBBrr : PDI<0xF8, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
13041340
"psubb {$src2, $dst|$dst, $src2}",
13051341
[(set VR128:$dst, (v16i8 (sub VR128:$src1, VR128:$src2)))]>;
@@ -1313,22 +1349,56 @@ def PSUBQrr : PDI<0xFB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
13131349
"psubq {$src2, $dst|$dst, $src2}",
13141350
[(set VR128:$dst, (v2i64 (sub VR128:$src1, VR128:$src2)))]>;
13151351

1316-
def PSUBBrm : PDI<0xF8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1352+
def PSUBBrm : PDI<0xF8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
13171353
"psubb {$src2, $dst|$dst, $src2}",
13181354
[(set VR128:$dst, (v16i8 (sub VR128:$src1,
13191355
(load addr:$src2))))]>;
1320-
def PSUBWrm : PDI<0xF9, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1356+
def PSUBWrm : PDI<0xF9, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
13211357
"psubw {$src2, $dst|$dst, $src2}",
13221358
[(set VR128:$dst, (v8i16 (sub VR128:$src1,
13231359
(load addr:$src2))))]>;
1324-
def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1360+
def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
13251361
"psubd {$src2, $dst|$dst, $src2}",
13261362
[(set VR128:$dst, (v4i32 (sub VR128:$src1,
13271363
(load addr:$src2))))]>;
1328-
def PSUBQrm : PDI<0xFB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
1364+
def PSUBQrm : PDI<0xFB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
13291365
"psubd {$src2, $dst|$dst, $src2}",
13301366
[(set VR128:$dst, (v2i64 (sub VR128:$src1,
13311367
(load addr:$src2))))]>;
1368+
1369+
def PSUBSBrr : PDI<0xE8, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1370+
"psubsb {$src2, $dst|$dst, $src2}",
1371+
[(set VR128:$dst, (int_x86_sse2_psubs_b VR128:$src1,
1372+
VR128:$src2))]>;
1373+
def PSUBSWrr : PDI<0xE9, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1374+
"psubsw {$src2, $dst|$dst, $src2}",
1375+
[(set VR128:$dst, (int_x86_sse2_psubs_w VR128:$src1,
1376+
VR128:$src2))]>;
1377+
def PSUBUSBrr : PDI<0xD8, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1378+
"psubusb {$src2, $dst|$dst, $src2}",
1379+
[(set VR128:$dst, (int_x86_sse2_psubus_b VR128:$src1,
1380+
VR128:$src2))]>;
1381+
def PSUBUSWrr : PDI<0xD9, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
1382+
"psubusw {$src2, $dst|$dst, $src2}",
1383+
[(set VR128:$dst, (int_x86_sse2_psubus_w VR128:$src1,
1384+
VR128:$src2))]>;
1385+
1386+
def PSUBSBrm : PDI<0xE8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1387+
"psubsb {$src2, $dst|$dst, $src2}",
1388+
[(set VR128:$dst, (int_x86_sse2_psubs_b VR128:$src1,
1389+
(bc_v16i8 (loadv2i64 addr:$src2))))]>;
1390+
def PSUBSWrm : PDI<0xE9, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1391+
"psubsw {$src2, $dst|$dst, $src2}",
1392+
[(set VR128:$dst, (int_x86_sse2_psubs_w VR128:$src1,
1393+
(bc_v8i16 (loadv2i64 addr:$src2))))]>;
1394+
def PSUBUSBrm : PDI<0xD8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1395+
"psubusb {$src2, $dst|$dst, $src2}",
1396+
[(set VR128:$dst, (int_x86_sse2_psubus_b VR128:$src1,
1397+
(bc_v16i8 (loadv2i64 addr:$src2))))]>;
1398+
def PSUBUSWrm : PDI<0xD9, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
1399+
"psubusw {$src2, $dst|$dst, $src2}",
1400+
[(set VR128:$dst, (int_x86_sse2_psubus_w VR128:$src1,
1401+
(bc_v8i16 (loadv2i64 addr:$src2))))]>;
13321402
}
13331403

13341404
let isTwoAddress = 1 in {

0 commit comments

Comments
 (0)