-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[AMDGPU][GISel] Use buildObjectPtrOffset instead of buildPtrAdd #150899
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][GISel] Use buildObjectPtrOffset instead of buildPtrAdd #150899
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Fabian Ritter (ritter-x2a) ChangesThis concerns offset computations for kernargs and For SWDEV-516125. Patch is 113.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150899.diff 10 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index fedfa3f9dd900..3d494374fb33b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2295,8 +2295,8 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
LLT::scalar(32), commonAlignment(Align(64), Offset));
// Pointer address
- B.buildPtrAdd(LoadAddr, KernargPtrReg,
- B.buildConstant(LLT::scalar(64), Offset).getReg(0));
+ B.buildObjectPtrOffset(LoadAddr, KernargPtrReg,
+ B.buildConstant(LLT::scalar(64), Offset).getReg(0));
// Load address
return B.buildLoad(S32, LoadAddr, *MMO).getReg(0);
}
@@ -2317,8 +2317,9 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
MachineMemOperand::MOInvariant,
LLT::scalar(32), commonAlignment(Align(64), StructOffset));
- B.buildPtrAdd(LoadAddr, QueuePtr,
- B.buildConstant(LLT::scalar(64), StructOffset).getReg(0));
+ B.buildObjectPtrOffset(
+ LoadAddr, QueuePtr,
+ B.buildConstant(LLT::scalar(64), StructOffset).getReg(0));
return B.buildLoad(S32, LoadAddr, *MMO).getReg(0);
}
@@ -4500,8 +4501,7 @@ Register AMDGPULegalizerInfo::getKernargParameterPtr(MachineIRBuilder &B,
llvm_unreachable("failed to find kernarg segment ptr");
auto COffset = B.buildConstant(LLT::scalar(64), Offset);
- // TODO: Should get nuw
- return B.buildPtrAdd(PtrTy, KernArgReg, COffset).getReg(0);
+ return B.buildObjectPtrOffset(PtrTy, KernArgReg, COffset).getReg(0);
}
/// Legalize a value that's loaded from kernel arguments. This is only used by
@@ -5676,8 +5676,8 @@ bool AMDGPULegalizerInfo::getImplicitArgPtr(Register DstReg,
AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR))
return false;
- // FIXME: This should be nuw
- B.buildPtrAdd(DstReg, KernargPtrReg, B.buildConstant(IdxTy, Offset).getReg(0));
+ B.buildObjectPtrOffset(DstReg, KernargPtrReg,
+ B.buildConstant(IdxTy, Offset).getReg(0));
return true;
}
@@ -7019,8 +7019,8 @@ bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr(
// Pointer address
Register LoadAddr = MRI.createGenericVirtualRegister(
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
- B.buildPtrAdd(LoadAddr, KernargPtrReg,
- B.buildConstant(LLT::scalar(64), Offset).getReg(0));
+ B.buildObjectPtrOffset(LoadAddr, KernargPtrReg,
+ B.buildConstant(LLT::scalar(64), Offset).getReg(0));
// Load address
Register Temp = B.buildLoad(S64, LoadAddr, *MMO).getReg(0);
B.buildCopy(SGPR01, Temp);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index f471881ee7693..b45627d9c1c5d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -294,7 +294,8 @@ void RegBankLegalizeHelper::splitLoad(MachineInstr &MI,
BasePlusOffset = Base;
} else {
auto Offset = B.buildConstant({PtrRB, OffsetTy}, ByteOffset);
- BasePlusOffset = B.buildPtrAdd({PtrRB, PtrTy}, Base, Offset).getReg(0);
+ BasePlusOffset =
+ B.buildObjectPtrOffset({PtrRB, PtrTy}, Base, Offset).getReg(0);
}
auto *OffsetMMO = MF.getMachineMemOperand(&BaseMMO, ByteOffset, PartTy);
auto LoadPart = B.buildLoad({DstRB, PartTy}, BasePlusOffset, *OffsetMMO);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
index 3e7a5671bb5de..33a9c5e258ea2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
@@ -24,7 +24,7 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() {
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY9]], [[C]](s64)
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
@@ -65,7 +65,7 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY9]], [[C]](s64)
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]]
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
@@ -105,7 +105,7 @@ define amdgpu_kernel void @kernel_call_no_other_sgprs() {
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p4) = COPY [[COPY3]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY4]], [[C]](s64)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY4]], [[C]](s64)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
index 33862de91430c..57ee2c8f88073 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
@@ -31,7 +31,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64)
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -84,7 +84,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64)
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -230,7 +230,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64)
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -319,7 +319,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64)
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -668,7 +668,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64)
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -710,7 +710,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64)
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -756,7 +756,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64)
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -802,7 +802,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64)
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -852,7 +852,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64)
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -898,7 +898,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64)
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -949,7 +949,7 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64)
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -996,7 +996,7 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64)
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -1047,7 +1047,7 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64)
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -1098,7 +1098,7 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64)
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -1153,7 +1153,7 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64)
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -1200,7 +1200,7 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64)
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 559391709f41d..6f624b2536f1a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -91,7 +91,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1)
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64)
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -174,7 +174,7 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 {
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C]](s64)
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -252,7 +252,7 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C]](s64)
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -314,7 +314,7 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C]](s64)
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -376,7 +376,7 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 {
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C]](s64)
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -456,7 +456,7 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C]](s64)
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -518,7 +518,7 @@ define amdgp...
[truncated]
|
Merge activity
|
7c58060
to
14a9067
Compare
4b74f7b
to
2edca34
Compare
f896ea3
to
94887b5
Compare
This concerns offset computations for kernargs and RegBankLegalizeHelper::splitLoad, which should all be within the bounds of a memory object. See #150392 for the motivation for introducing the buildObjectPtrOffset function. For SWDEV-516125.
94887b5
to
5f1adc2
Compare
This concerns offset computations for kernargs and
RegBankLegalizeHelper::splitLoad, which should all be within the bounds of a
memory object. See #150392 for the motivation for introducing the
buildObjectPtrOffset function.
For SWDEV-516125.