Skip to content

Commit 4bd186c

Browse files
author
QingShan Zhang
committed
[PowerPC] Exploit the rldicl + rldicl when and with mask
If we are and the constant like 0xFFFFFFC00000, for now, we are using several instructions to generate this 48bit constant and final an "and". However, we could exploit it with two rotate instructions. MB ME MB+63-ME +----------------------+ +----------------------+ |0000001111111111111000| -> |0000000001111111111111| +----------------------+ +----------------------+ 0 63 0 63 Rotate left ME + 1 bit first, and then, mask it with (MB + 63 - ME, 63), finally, rotate back. Notice that, we need to round it with 64 bit for the wrapping case. Reviewed by: ChenZheng, Nemanjai Differential Revision: https://reviews.llvm.org/D71831
1 parent 5034df8 commit 4bd186c

File tree

6 files changed

+83
-45
lines changed

6 files changed

+83
-45
lines changed

llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,7 @@ namespace {
351351
bool tryAsSingleRLWINM(SDNode *N);
352352
bool tryAsSingleRLWINM8(SDNode *N);
353353
bool tryAsSingleRLWIMI(SDNode *N);
354+
bool tryAsPairOfRLDICL(SDNode *N);
354355

355356
void PeepholePPC64();
356357
void PeepholePPC64ZExt();
@@ -4439,6 +4440,60 @@ bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
44394440
return false;
44404441
}
44414442

4443+
bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
4444+
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4445+
uint64_t Imm64;
4446+
if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
4447+
return false;
4448+
4449+
// Do nothing if it is 16-bit imm as the pattern in the .td file handle
4450+
// it well with "andi.".
4451+
if (isUInt<16>(Imm64))
4452+
return false;
4453+
4454+
SDLoc Loc(N);
4455+
SDValue Val = N->getOperand(0);
4456+
4457+
// Optimized with two rldicl's as follows:
4458+
// Add missing bits on left to the mask and check that the mask is a
4459+
// wrapped run of ones, i.e.
4460+
// Change pattern |0001111100000011111111|
4461+
// to |1111111100000011111111|.
4462+
unsigned NumOfLeadingZeros = countLeadingZeros(Imm64);
4463+
if (NumOfLeadingZeros != 0)
4464+
Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
4465+
4466+
unsigned MB, ME;
4467+
if (!isRunOfOnes64(Imm64, MB, ME))
4468+
return false;
4469+
4470+
// ME MB MB-ME+63
4471+
// +----------------------+ +----------------------+
4472+
// |1111111100000011111111| -> |0000001111111111111111|
4473+
// +----------------------+ +----------------------+
4474+
// 0 63 0 63
4475+
// There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
4476+
unsigned OnesOnLeft = ME + 1;
4477+
unsigned ZerosInBetween = (MB - ME + 63) & 63;
4478+
// Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
4479+
// on the left the bits that are already zeros in the mask.
4480+
Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
4481+
getI64Imm(OnesOnLeft, Loc),
4482+
getI64Imm(ZerosInBetween, Loc)),
4483+
0);
4484+
// MB-ME+63 ME MB
4485+
// +----------------------+ +----------------------+
4486+
// |0000001111111111111111| -> |0001111100000011111111|
4487+
// +----------------------+ +----------------------+
4488+
// 0 63 0 63
4489+
// Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
4490+
// left the number of ones we previously added.
4491+
SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
4492+
getI64Imm(NumOfLeadingZeros, Loc)};
4493+
CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
4494+
return true;
4495+
}
4496+
44424497
bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
44434498
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
44444499
unsigned Imm;
@@ -4766,7 +4821,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
47664821
case ISD::AND:
47674822
// If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
47684823
if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||
4769-
tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N))
4824+
tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))
47704825
return;
47714826

47724827
// Other cases are autogenerated.

llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ target triple = "powerpc64le-unknown-linux-gnu"
77
%typ = type { i32, i32 }
88

99
; On release builds, it doesn't crash, spewing nonsense instead.
10-
; To make sure it works, check that and is still alive.
11-
; CHECK: and
10+
; To make sure it works, check that rldicl is still alive.
11+
; CHECK: rldicl
1212
; Also, in release, it emits a COPY from a 32-bit register to
1313
; a 64-bit register, which happens to be emitted as cror [!]
1414
; by the confused CodeGen. Just to be sure, check there isn't one.

llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,13 @@ define i32* @f1(i32 %n) nounwind {
4343
; PPC64-LINUX-LABEL: f1
4444
; PPC64-LINUX: std 31, -8(1)
4545
; PPC64-LINUX-NEXT: stdu 1, -64(1)
46-
; PPC64-LINUX-NEXT: lis 4, 32767
4746
; PPC64-LINUX-NEXT: rldic 3, 3, 2, 30
48-
; PPC64-LINUX-NEXT: ori 4, 4, 65535
49-
; PPC64-LINUX-NEXT: addi 3, 3, 15
50-
; PPC64-LINUX-NEXT: sldi 4, 4, 4
5147
; PPC64-LINUX-NEXT: mr 31, 1
52-
; PPC64-LINUX-NEXT: and 3, 3, 4
53-
; PPC64-LINUX-NEXT: neg 3, 3
48+
; PPC64-LINUX-NEXT: addi 3, 3, 15
49+
; PPC64-LINUX-NEXT: rldicl 3, 3, 60, 4
5450
; PPC64-LINUX-NEXT: addi 4, 31, 64
51+
; PPC64-LINUX-NEXT: rldicl 3, 3, 4, 29
52+
; PPC64-LINUX-NEXT: neg 3, 3
5553
; PPC64-LINUX-NEXT: stdux 4, 1, 3
5654

5755
; The linkage area is always put on the top of the stack.
@@ -82,14 +80,12 @@ define i32* @f1(i32 %n) nounwind {
8280
; PPC64-AIX-LABEL: f1
8381
; PPC64-AIX: std 31, -8(1)
8482
; PPC64-AIX-NEXT: stdu 1, -64(1)
85-
; PPC64-AIX-NEXT: lis 4, 32767
8683
; PPC64-AIX-NEXT: rldic 3, 3, 2, 30
87-
; PPC64-AIX-NEXT: ori 4, 4, 65535
88-
; PPC64-AIX-NEXT: addi 3, 3, 15
89-
; PPC64-AIX-NEXT: sldi 4, 4, 4
9084
; PPC64-AIX-NEXT: mr 31, 1
91-
; PPC64-AIX-NEXT: and 3, 3, 4
85+
; PPC64-AIX-NEXT: addi 3, 3, 15
9286
; PPC64-AIX-NEXT: addi 4, 31, 64
87+
; PPC64-AIX-NEXT: rldicl 3, 3, 60, 4
88+
; PPC64-AIX-NEXT: rldicl 3, 3, 4, 29
9389
; PPC64-AIX-NEXT: neg 3, 3
9490
; PPC64-AIX-NEXT: stdux 4, 1, 3
9591

llvm/test/CodeGen/PowerPC/and-mask.ll

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ define i32 @test1(i32 %a) {
1515
define i64 @test2(i64 %a) {
1616
; CHECK-LABEL: test2:
1717
; CHECK: # %bb.0:
18-
; CHECK-NEXT: li 4, -7
19-
; CHECK-NEXT: and 3, 3, 4
18+
; CHECK-NEXT: rldicl 3, 3, 61, 2
19+
; CHECK-NEXT: rotldi 3, 3, 3
2020
; CHECK-NEXT: blr
2121
%and = and i64 %a, -7
2222
ret i64 %and
@@ -26,10 +26,8 @@ define i64 @test2(i64 %a) {
2626
define i64 @test3(i64 %a) {
2727
; CHECK-LABEL: test3:
2828
; CHECK: # %bb.0:
29-
; CHECK-NEXT: lis 4, 1023
30-
; CHECK-NEXT: ori 4, 4, 65535
31-
; CHECK-NEXT: sldi 4, 4, 22
32-
; CHECK-NEXT: and 3, 3, 4
29+
; CHECK-NEXT: rldicl 3, 3, 42, 22
30+
; CHECK-NEXT: rldicl 3, 3, 22, 16
3331
; CHECK-NEXT: blr
3432
%and = and i64 %a, 281474972516352
3533
ret i64 %and
@@ -39,10 +37,8 @@ define i64 @test3(i64 %a) {
3937
define i64 @test4(i64 %a) {
4038
; CHECK-LABEL: test4:
4139
; CHECK: # %bb.0:
42-
; CHECK-NEXT: li 4, 12
43-
; CHECK-NEXT: sldi 4, 4, 32
44-
; CHECK-NEXT: ori 4, 4, 255
45-
; CHECK-NEXT: and 3, 3, 4
40+
; CHECK-NEXT: rldicl 3, 3, 30, 26
41+
; CHECK-NEXT: rldicl 3, 3, 34, 28
4642
; CHECK-NEXT: blr
4743
%and = and i64 %a, 51539607807
4844
ret i64 %and
@@ -52,10 +48,8 @@ define i64 @test4(i64 %a) {
5248
define i64 @test5(i64 %a) {
5349
; CHECK-LABEL: test5:
5450
; CHECK: # %bb.0:
55-
; CHECK-NEXT: li 4, 0
56-
; CHECK-NEXT: oris 4, 4, 65472
57-
; CHECK-NEXT: ori 4, 4, 65535
58-
; CHECK-NEXT: and 3, 3, 4
51+
; CHECK-NEXT: rldicl 3, 3, 42, 6
52+
; CHECK-NEXT: rldicl 3, 3, 22, 32
5953
; CHECK-NEXT: blr
6054
%and = and i64 %a, 4290838527
6155
ret i64 %and
@@ -77,11 +71,8 @@ define i64 @test6(i64 %a) {
7771
define i64 @test7(i64 %a) {
7872
; CHECK-LABEL: test7:
7973
; CHECK: # %bb.0:
80-
; CHECK-NEXT: li 4, -32767
81-
; CHECK-NEXT: sldi 4, 4, 32
82-
; CHECK-NEXT: oris 4, 4, 65024
83-
; CHECK-NEXT: rldicr 4, 4, 17, 63
84-
; CHECK-NEXT: and 3, 3, 4
74+
; CHECK-NEXT: rldicl 3, 3, 22, 25
75+
; CHECK-NEXT: rldicl 3, 3, 42, 14
8576
; CHECK-NEXT: blr
8677
%and = and i64 %a, 1121501860462591
8778
ret i64 %and

llvm/test/CodeGen/PowerPC/cmpb.ll

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -123,11 +123,9 @@ entry:
123123
ret i32 %or55
124124

125125
; CHECK-LABEL: @test32p1
126-
; CHECK: li [[REG1:[0-9]+]], 0
127-
; CHECK: cmpb [[REG4:[0-9]+]], 4, 3
128-
; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65287
129-
; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535
130-
; CHECK: and 3, [[REG4]], [[REG3]]
126+
; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
127+
; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 40, 5
128+
; CHECK: rldicl 3, [[REG2]], 24, 32
131129
; CHECK: blr
132130
}
133131

@@ -147,11 +145,9 @@ entry:
147145
ret i32 %or37
148146

149147
; CHECK-LABEL: @test32p2
150-
; CHECK: li [[REG1:[0-9]+]], 0
151-
; CHECK: cmpb [[REG4:[0-9]+]], 4, 3
152-
; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65280
153-
; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535
154-
; CHECK: and 3, [[REG4]], [[REG3]]
148+
; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
149+
; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 40, 8
150+
; CHECK: rldicl 3, [[REG2]], 24, 32
155151
; CHECK: blr
156152
}
157153

llvm/test/CodeGen/PowerPC/setcc-logic.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -481,9 +481,9 @@ define <4 x i1> @and_eq_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32>
481481
define i1 @or_icmps_const_1bit_diff(i64 %x) {
482482
; CHECK-LABEL: or_icmps_const_1bit_diff:
483483
; CHECK: # %bb.0:
484-
; CHECK-NEXT: li 4, -5
485484
; CHECK-NEXT: addi 3, 3, -13
486-
; CHECK-NEXT: and 3, 3, 4
485+
; CHECK-NEXT: rldicl 3, 3, 61, 1
486+
; CHECK-NEXT: rotldi 3, 3, 3
487487
; CHECK-NEXT: cntlzd 3, 3
488488
; CHECK-NEXT: rldicl 3, 3, 58, 63
489489
; CHECK-NEXT: blr

0 commit comments

Comments
 (0)