Skip to content

Commit 34b7c16

Browse files
committed
Merge 64314 from mainline.
Make a transformation added in 63266 a bit less aggressive. It was transforming (x&y)==y to (x&y)!=0 in the case where y is variable and known to have at most one bit set (e.g. z&1). This is not correct; the expressions are not equivalent when y==0. I believe this patch salvages what can be salvaged, including all the cases in bt.ll. llvm-svn: 64782
1 parent c9dfb07 commit 34b7c16

File tree

2 files changed

+79
-5
lines changed

2 files changed

+79
-5
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,20 +1346,21 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
13461346
return 1;
13471347
}
13481348

1349-
static bool ValueHasAtMostOneBitSet(SDValue Val, const SelectionDAG &DAG) {
1349+
static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
13501350
// Logical shift right or left won't ever introduce new set bits.
13511351
// We check for this case because we don't care which bits are
13521352
// set, but ComputeMaskedBits won't know anything unless it can
13531353
// determine which specific bits may be set.
13541354
if (Val.getOpcode() == ISD::SHL || Val.getOpcode() == ISD::SRL)
1355-
return ValueHasAtMostOneBitSet(Val.getOperand(0), DAG);
1355+
return ValueHasExactlyOneBitSet(Val.getOperand(0), DAG);
13561356

13571357
MVT OpVT = Val.getValueType();
13581358
unsigned BitWidth = OpVT.getSizeInBits();
13591359
APInt Mask = APInt::getAllOnesValue(BitWidth);
13601360
APInt KnownZero, KnownOne;
13611361
DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);
1362-
return KnownZero.countPopulation() == BitWidth - 1;
1362+
return (KnownZero.countPopulation() == BitWidth - 1) &&
1363+
(KnownOne.countPopulation() == 1);
13631364
}
13641365

13651366
/// SimplifySetCC - Try to simplify a setcc built with the specified operands
@@ -1832,17 +1833,20 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1,
18321833
}
18331834

18341835
// Simplify x&y == y to x&y != 0 if y has exactly one bit set.
1836+
// Note that where y is variable and is known to have at most
1837+
// one bit set (for example, if it is z&1) we cannot do this;
1838+
// the expressions are not equivalent when y==0.
18351839
if (N0.getOpcode() == ISD::AND)
18361840
if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) {
1837-
if (ValueHasAtMostOneBitSet(N1, DAG)) {
1841+
if (ValueHasExactlyOneBitSet(N1, DAG)) {
18381842
Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
18391843
SDValue Zero = DAG.getConstant(0, N1.getValueType());
18401844
return DAG.getSetCC(dl, VT, N0, Zero, Cond);
18411845
}
18421846
}
18431847
if (N1.getOpcode() == ISD::AND)
18441848
if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) {
1845-
if (ValueHasAtMostOneBitSet(N0, DAG)) {
1849+
if (ValueHasExactlyOneBitSet(N0, DAG)) {
18461850
Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
18471851
SDValue Zero = DAG.getConstant(0, N0.getValueType());
18481852
return DAG.getSetCC(dl, VT, N1, Zero, Cond);

llvm/test/CodeGen/X86/nobt.ll

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
; RUN: llvm-as < %s | llc -march=x86 | not grep btl
2+
3+
; This tests some cases where BT must not be generated. See also bt.ll.
4+
; Fixes 20040709-[12].c in gcc testsuite.
5+
6+
define void @test2(i32 %x, i32 %n) nounwind {
7+
entry:
8+
%tmp1 = and i32 %x, 1
9+
%tmp2 = urem i32 %tmp1, 15
10+
%tmp3 = and i32 %tmp2, 1 ; <i32> [#uses=1]
11+
%tmp4 = icmp eq i32 %tmp3, %tmp2 ; <i1> [#uses=1]
12+
br i1 %tmp4, label %bb, label %UnifiedReturnBlock
13+
14+
bb: ; preds = %entry
15+
call void @foo()
16+
ret void
17+
18+
UnifiedReturnBlock: ; preds = %entry
19+
ret void
20+
}
21+
22+
define void @test3(i32 %x, i32 %n) nounwind {
23+
entry:
24+
%tmp1 = and i32 %x, 1
25+
%tmp2 = urem i32 %tmp1, 15
26+
%tmp3 = and i32 %tmp2, 1 ; <i32> [#uses=1]
27+
%tmp4 = icmp eq i32 %tmp2, %tmp3 ; <i1> [#uses=1]
28+
br i1 %tmp4, label %bb, label %UnifiedReturnBlock
29+
30+
bb: ; preds = %entry
31+
call void @foo()
32+
ret void
33+
34+
UnifiedReturnBlock: ; preds = %entry
35+
ret void
36+
}
37+
38+
define void @test4(i32 %x, i32 %n) nounwind {
39+
entry:
40+
%tmp1 = and i32 %x, 1
41+
%tmp2 = urem i32 %tmp1, 15
42+
%tmp3 = and i32 %tmp2, 1 ; <i32> [#uses=1]
43+
%tmp4 = icmp ne i32 %tmp2, %tmp3 ; <i1> [#uses=1]
44+
br i1 %tmp4, label %bb, label %UnifiedReturnBlock
45+
46+
bb: ; preds = %entry
47+
call void @foo()
48+
ret void
49+
50+
UnifiedReturnBlock: ; preds = %entry
51+
ret void
52+
}
53+
54+
define void @test5(i32 %x, i32 %n) nounwind {
55+
entry:
56+
%tmp1 = and i32 %x, 1
57+
%tmp2 = urem i32 %tmp1, 15
58+
%tmp3 = and i32 %tmp2, 1 ; <i32> [#uses=1]
59+
%tmp4 = icmp ne i32 %tmp2, %tmp3 ; <i1> [#uses=1]
60+
br i1 %tmp4, label %bb, label %UnifiedReturnBlock
61+
62+
bb: ; preds = %entry
63+
call void @foo()
64+
ret void
65+
66+
UnifiedReturnBlock: ; preds = %entry
67+
ret void
68+
}
69+
70+
declare void @foo()

0 commit comments

Comments
 (0)