Skip to content

Commit d11ba7a

Browse files
committed
change approach
1 parent d36dadd commit d11ba7a

File tree

3 files changed

+33
-35
lines changed

3 files changed

+33
-35
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15244,21 +15244,31 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) {
1524415244
}
1524515245
}
1524615246

15247-
// If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
15248-
// than X, and the And doesn't change the lower iX bits, we can move the
15249-
// AssertZext in front of the And and drop the AssertSext.
1525015247
if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND &&
15251-
N0.hasOneUse() && N0.getOperand(0).getOpcode() == ISD::AssertSext &&
15252-
isa<ConstantSDNode>(N0.getOperand(1))) {
15253-
SDValue BigA = N0.getOperand(0);
15254-
EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15248+
N0.hasOneUse() && isa<ConstantSDNode>(N0.getOperand(1))) {
1525515249
const APInt &Mask = N0.getConstantOperandAPInt(1);
15256-
if (AssertVT.bitsLT(BigA_AssertVT) &&
15257-
Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) {
15250+
15251+
// If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
15252+
// than X, and the And doesn't change the lower iX bits, we can move the
15253+
// AssertZext in front of the And and drop the AssertSext.
15254+
if (N0.getOperand(0).getOpcode() == ISD::AssertSext) {
15255+
SDValue BigA = N0.getOperand(0);
15256+
EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15257+
if (AssertVT.bitsLT(BigA_AssertVT) &&
15258+
Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) {
15259+
SDLoc DL(N);
15260+
SDValue NewAssert =
15261+
DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1);
15262+
return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert,
15263+
N0.getOperand(1));
15264+
}
15265+
}
15266+
15267+
// Remove AssertZext entirely if the mask guarantees the assertion cannot
15268+
// fail.
15269+
if (Mask.isMask() && Mask.countr_one() <= AssertVT.getScalarSizeInBits()) {
1525815270
SDLoc DL(N);
15259-
SDValue NewAssert =
15260-
DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1);
15261-
return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert,
15271+
return DAG.getNode(ISD::AND, DL, N0.getValueType(), N0.getOperand(0),
1526215272
N0.getOperand(1));
1526315273
}
1526415274
}

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3990,24 +3990,6 @@ SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N,
39903990
}
39913991
}
39923992

3993-
// AssertZext in front of these intrinsics is not necessary, the lowering of
3994-
// the intrinsics into a register read will insert one if it is needed.
3995-
if (N->getOpcode() == ISD::AssertZext &&
3996-
N0.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3997-
unsigned IID = N0.getConstantOperandVal(0);
3998-
switch (IID) {
3999-
case Intrinsic::amdgcn_workitem_id_x:
4000-
case Intrinsic::amdgcn_workitem_id_y:
4001-
case Intrinsic::amdgcn_workitem_id_z:
4002-
case Intrinsic::amdgcn_workgroup_id_x:
4003-
case Intrinsic::amdgcn_workgroup_id_y:
4004-
case Intrinsic::amdgcn_workgroup_id_z:
4005-
return N0;
4006-
default:
4007-
break;
4008-
}
4009-
}
4010-
40113993
return SDValue();
40123994
}
40133995

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-unsupported-calling-convention.ll

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,16 @@ declare i32 @llvm.amdgcn.workitem.id.z()
99
; FIXME: It's not worth adding AssertZext to the intrinsic calls, and
1010
; we don't fold out assertzext undef ->undef
1111
define amdgpu_ps void @undefined_workitems(ptr addrspace(1) %p, ptr addrspace(1) %q, ptr addrspace(1) %r) {
12-
; CHECK-LABEL: undefined_workitems:
13-
; CHECK: ; %bb.0:
14-
; CHECK-NEXT: s_endpgm
12+
; SDAG-LABEL: undefined_workitems:
13+
; SDAG: ; %bb.0:
14+
; SDAG-NEXT: global_store_dword v[0:1], v0, off
15+
; SDAG-NEXT: global_store_dword v[2:3], v0, off
16+
; SDAG-NEXT: global_store_dword v[4:5], v0, off
17+
; SDAG-NEXT: s_endpgm
18+
;
19+
; GISEL-LABEL: undefined_workitems:
20+
; GISEL: ; %bb.0:
21+
; GISEL-NEXT: s_endpgm
1522
%id.x = call i32 @llvm.amdgcn.workitem.id.x()
1623
store i32 %id.x, ptr addrspace(1) %p
1724
%id.y = call i32 @llvm.amdgcn.workitem.id.y()
@@ -21,5 +28,4 @@ define amdgpu_ps void @undefined_workitems(ptr addrspace(1) %p, ptr addrspace(1)
2128
ret void
2229
}
2330
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
24-
; GISEL: {{.*}}
25-
; SDAG: {{.*}}
31+
; CHECK: {{.*}}

0 commit comments

Comments
 (0)