-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[AMDGPU][Verifier] Limit kill/wqm.demote intrinsics to PS shaders #151922
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Only amdgpu_ps shaders should be calling llvm.amdgcn.kill and llvm.amdgcn.wqm.demote. Enforce this through the verifier.
@llvm/pr-subscribers-llvm-ir Author: Carl Ritson (perlfu) ChangesOnly amdgpu_ps shaders should be calling llvm.amdgcn.kill and llvm.amdgcn.wqm.demote. Enforce this through the verifier. Full diff: https://github.com/llvm/llvm-project/pull/151922.diff 6 Files Affected:
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 3ff9895e161c4..90c60b5d64841 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6714,6 +6714,13 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"invalid vector type for format", &Call, Src0, Call.getArgOperand(0));
Check(Src1Ty->getNumElements() >= getFormatNumRegs(FmtB),
"invalid vector type for format", &Call, Src1, Call.getArgOperand(2));
+ }
+ case Intrinsic::amdgcn_kill:
+ case Intrinsic::amdgcn_wqm_demote: {
+ Check(Call.getCaller()->getCallingConv() == CallingConv::AMDGPU_PS,
+ "Intrinsic can only be used from functions with the amdgpu_ps"
+ " calling convention ",
+ &Call);
break;
}
case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32:
diff --git a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
index b63fff38f34f6..8fb7c5daf081e 100644
--- a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
+++ b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
@@ -145,10 +145,10 @@ define amdgpu_kernel void @test_flush_f64_outputs(ptr addrspace(1) %out0, ptr ad
ret void
}
-; GCN-LABEL: {{^}}kill_gs_const:
+; GCN-LABEL: {{^}}kill_ps_const:
; GCN: FloatMode: 240
; GCN: IeeeMode: 0
-define amdgpu_gs void @kill_gs_const() {
+define amdgpu_ps void @kill_ps_const() {
main_body:
%cmp0 = icmp ule i32 0, 3
call void @llvm.amdgcn.kill(i1 %cmp0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
index 462090c6e89df..6d85cdceec4c6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
@@ -3,10 +3,10 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
-; GCN-LABEL: {{^}}gs_const:
+; GCN-LABEL: {{^}}ps_const:
; GCN-NOT: v_cmpx
; GCN: s_mov_b64 exec, 0
-define amdgpu_gs void @gs_const() {
+define amdgpu_ps void @ps_const() {
%tmp = icmp ule i32 0, 3
%tmp1 = select i1 %tmp, float 1.000000e+00, float -1.000000e+00
%c1 = fcmp oge float %tmp1, 0.0
@@ -37,7 +37,7 @@ define amdgpu_ps void @vcc_implicit_def(float %arg13, float %arg14) {
; GCN-LABEL: {{^}}true:
; GCN-NEXT: %bb.
; GCN-NEXT: s_endpgm
-define amdgpu_gs void @true() {
+define amdgpu_ps void @true() {
call void @llvm.amdgcn.kill(i1 true)
ret void
}
@@ -45,7 +45,7 @@ define amdgpu_gs void @true() {
; GCN-LABEL: {{^}}false:
; GCN-NOT: v_cmpx
; GCN: s_mov_b64 exec, 0
-define amdgpu_gs void @false() {
+define amdgpu_ps void @false() {
call void @llvm.amdgcn.kill(i1 false)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
ret void
@@ -58,7 +58,7 @@ define amdgpu_gs void @false() {
; GCN: s_and{{n2|_not1}}_b64 s[0:1], exec, s[0:1]
; GCN: s_and{{n2|_not1}}_b64 s[2:3], s[2:3], s[0:1]
; GCN: s_and_b64 exec, exec, s[2:3]
-define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
+define amdgpu_ps void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
%c1 = icmp slt i32 %a, %b
%c2 = icmp slt i32 %c, %d
%x = or i1 %c1, %c2
@@ -73,7 +73,7 @@ define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
; GCN: s_xor_b64 s[0:1]
; GCN: s_and{{n2|_not1}}_b64 s[2:3], s[2:3], s[0:1]
; GCN: s_and_b64 exec, exec, s[2:3]
-define amdgpu_gs void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
+define amdgpu_ps void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
%c1 = icmp slt i32 %a, %b
%c2 = icmp slt i32 %c, %d
%x = xor i1 %c1, %c2
@@ -85,7 +85,7 @@ define amdgpu_gs void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
; GCN-LABEL: {{^}}oeq:
; GCN: v_cmp_neq_f32
-define amdgpu_gs void @oeq(float %a) {
+define amdgpu_ps void @oeq(float %a) {
%c1 = fcmp oeq float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -94,7 +94,7 @@ define amdgpu_gs void @oeq(float %a) {
; GCN-LABEL: {{^}}ogt:
; GCN: v_cmp_nlt_f32
-define amdgpu_gs void @ogt(float %a) {
+define amdgpu_ps void @ogt(float %a) {
%c1 = fcmp ogt float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -103,7 +103,7 @@ define amdgpu_gs void @ogt(float %a) {
; GCN-LABEL: {{^}}oge:
; GCN: v_cmp_nle_f32
-define amdgpu_gs void @oge(float %a) {
+define amdgpu_ps void @oge(float %a) {
%c1 = fcmp oge float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -112,7 +112,7 @@ define amdgpu_gs void @oge(float %a) {
; GCN-LABEL: {{^}}olt:
; GCN: v_cmp_ngt_f32
-define amdgpu_gs void @olt(float %a) {
+define amdgpu_ps void @olt(float %a) {
%c1 = fcmp olt float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -121,7 +121,7 @@ define amdgpu_gs void @olt(float %a) {
; GCN-LABEL: {{^}}ole:
; GCN: v_cmp_nge_f32
-define amdgpu_gs void @ole(float %a) {
+define amdgpu_ps void @ole(float %a) {
%c1 = fcmp ole float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -130,7 +130,7 @@ define amdgpu_gs void @ole(float %a) {
; GCN-LABEL: {{^}}one:
; GCN: v_cmp_nlg_f32
-define amdgpu_gs void @one(float %a) {
+define amdgpu_ps void @one(float %a) {
%c1 = fcmp one float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -139,7 +139,7 @@ define amdgpu_gs void @one(float %a) {
; GCN-LABEL: {{^}}ord:
; GCN: v_cmp_o_f32
-define amdgpu_gs void @ord(float %a) {
+define amdgpu_ps void @ord(float %a) {
%c1 = fcmp ord float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -148,7 +148,7 @@ define amdgpu_gs void @ord(float %a) {
; GCN-LABEL: {{^}}uno:
; GCN: v_cmp_u_f32
-define amdgpu_gs void @uno(float %a) {
+define amdgpu_ps void @uno(float %a) {
%c1 = fcmp uno float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -157,7 +157,7 @@ define amdgpu_gs void @uno(float %a) {
; GCN-LABEL: {{^}}ueq:
; GCN: v_cmp_lg_f32
-define amdgpu_gs void @ueq(float %a) {
+define amdgpu_ps void @ueq(float %a) {
%c1 = fcmp ueq float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -166,7 +166,7 @@ define amdgpu_gs void @ueq(float %a) {
; GCN-LABEL: {{^}}ugt:
; GCN: v_cmp_ge_f32
-define amdgpu_gs void @ugt(float %a) {
+define amdgpu_ps void @ugt(float %a) {
%c1 = fcmp ugt float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -175,7 +175,7 @@ define amdgpu_gs void @ugt(float %a) {
; GCN-LABEL: {{^}}uge:
; GCN: v_cmp_gt_f32_e32 vcc, -1.0
-define amdgpu_gs void @uge(float %a) {
+define amdgpu_ps void @uge(float %a) {
%c1 = fcmp uge float %a, -1.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -184,7 +184,7 @@ define amdgpu_gs void @uge(float %a) {
; GCN-LABEL: {{^}}ult:
; GCN: v_cmp_le_f32_e32 vcc, -2.0
-define amdgpu_gs void @ult(float %a) {
+define amdgpu_ps void @ult(float %a) {
%c1 = fcmp ult float %a, -2.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -193,7 +193,7 @@ define amdgpu_gs void @ult(float %a) {
; GCN-LABEL: {{^}}ule:
; GCN: v_cmp_lt_f32_e32 vcc, 2.0
-define amdgpu_gs void @ule(float %a) {
+define amdgpu_ps void @ule(float %a) {
%c1 = fcmp ule float %a, 2.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -202,7 +202,7 @@ define amdgpu_gs void @ule(float %a) {
; GCN-LABEL: {{^}}une:
; GCN: v_cmp_eq_f32_e32 vcc, 0
-define amdgpu_gs void @une(float %a) {
+define amdgpu_ps void @une(float %a) {
%c1 = fcmp une float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -211,7 +211,7 @@ define amdgpu_gs void @une(float %a) {
; GCN-LABEL: {{^}}neg_olt:
; GCN: v_cmp_gt_f32_e32 vcc, 1.0
-define amdgpu_gs void @neg_olt(float %a) {
+define amdgpu_ps void @neg_olt(float %a) {
%c1 = fcmp olt float %a, 1.0
%c2 = xor i1 %c1, 1
call void @llvm.amdgcn.kill(i1 %c2)
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index 097154ed23ede..50f1a3ae44f63 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -1760,7 +1760,7 @@ define amdgpu_ps void @test_kill_i1_terminator_float() #0 {
ret void
}
-define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d) #0 {
+define amdgpu_ps void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d) #0 {
; GFX1032-LABEL: test_kill_i1_terminator_i1:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v1
@@ -1769,12 +1769,15 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d
; GFX1032-NEXT: s_or_b32 s0, vcc_lo, s0
; GFX1032-NEXT: s_andn2_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_andn2_b32 s1, s1, s0
+; GFX1032-NEXT: s_cbranch_scc0 .LBB32_2
+; GFX1032-NEXT: ; %bb.1:
; GFX1032-NEXT: s_and_b32 exec_lo, exec_lo, s1
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-NEXT: exp mrt0 off, off, off, off
; GFX1032-NEXT: s_endpgm
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: .LBB32_2:
; GFX1032-NEXT: s_mov_b32 exec_lo, 0
+; GFX1032-NEXT: exp null off, off, off, off done vm
; GFX1032-NEXT: s_endpgm
;
; GFX1064-LABEL: test_kill_i1_terminator_i1:
@@ -1785,12 +1788,15 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d
; GFX1064-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX1064-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
+; GFX1064-NEXT: s_cbranch_scc0 .LBB32_2
+; GFX1064-NEXT: ; %bb.1:
; GFX1064-NEXT: s_and_b64 exec, exec, s[2:3]
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
; GFX1064-NEXT: exp mrt0 off, off, off, off
; GFX1064-NEXT: s_endpgm
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: .LBB32_2:
; GFX1064-NEXT: s_mov_b64 exec, 0
+; GFX1064-NEXT: exp null off, off, off, off done vm
; GFX1064-NEXT: s_endpgm
%c1 = icmp slt i32 %a, %b
%c2 = icmp slt i32 %c, %d
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 077da9cda6523..95fc24573c058 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -2727,7 +2727,7 @@ main_body:
declare void @llvm.amdgcn.kill(i1)
-define void @kill_true() {
+define amdgpu_ps void @kill_true() {
; CHECK-LABEL: @kill_true(
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Verifier/amdgpu-intrinsic.ll b/llvm/test/Verifier/amdgpu-intrinsic.ll
new file mode 100644
index 0000000000000..7745e0198b36d
--- /dev/null
+++ b/llvm/test/Verifier/amdgpu-intrinsic.ll
@@ -0,0 +1,34 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+target datalayout = "A5"
+
+declare void @llvm.amdgcn.kill(i1)
+declare void @llvm.amdgcn.wqm.demote(i1)
+
+; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
+; CHECK-NEXT: call void @llvm.amdgcn.kill(i1 true)
+define amdgpu_cs void @cs_kill() {
+ call void @llvm.amdgcn.kill(i1 true)
+ ret void
+}
+
+; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
+; CHECK-NEXT: call void @llvm.amdgcn.kill(i1 true)
+define amdgpu_gs void @gs_kill() {
+ call void @llvm.amdgcn.kill(i1 true)
+ ret void
+}
+
+; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
+; CHECK-NEXT: call void @llvm.amdgcn.wqm.demote(i1 true)
+define amdgpu_cs void @cs_wqm_demote() {
+ call void @llvm.amdgcn.wqm.demote(i1 true)
+ ret void
+}
+
+; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
+; CHECK-NEXT: call void @llvm.amdgcn.wqm.demote(i1 true)
+define amdgpu_gs void @gs_wqm_demote() {
+ call void @llvm.amdgcn.wqm.demote(i1 true)
+ ret void
+}
|
@llvm/pr-subscribers-llvm-transforms Author: Carl Ritson (perlfu) ChangesOnly amdgpu_ps shaders should be calling llvm.amdgcn.kill and llvm.amdgcn.wqm.demote. Enforce this through the verifier. Full diff: https://github.com/llvm/llvm-project/pull/151922.diff 6 Files Affected:
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 3ff9895e161c4..90c60b5d64841 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6714,6 +6714,13 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"invalid vector type for format", &Call, Src0, Call.getArgOperand(0));
Check(Src1Ty->getNumElements() >= getFormatNumRegs(FmtB),
"invalid vector type for format", &Call, Src1, Call.getArgOperand(2));
+ }
+ case Intrinsic::amdgcn_kill:
+ case Intrinsic::amdgcn_wqm_demote: {
+ Check(Call.getCaller()->getCallingConv() == CallingConv::AMDGPU_PS,
+ "Intrinsic can only be used from functions with the amdgpu_ps"
+ " calling convention ",
+ &Call);
break;
}
case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32:
diff --git a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
index b63fff38f34f6..8fb7c5daf081e 100644
--- a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
+++ b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
@@ -145,10 +145,10 @@ define amdgpu_kernel void @test_flush_f64_outputs(ptr addrspace(1) %out0, ptr ad
ret void
}
-; GCN-LABEL: {{^}}kill_gs_const:
+; GCN-LABEL: {{^}}kill_ps_const:
; GCN: FloatMode: 240
; GCN: IeeeMode: 0
-define amdgpu_gs void @kill_gs_const() {
+define amdgpu_ps void @kill_ps_const() {
main_body:
%cmp0 = icmp ule i32 0, 3
call void @llvm.amdgcn.kill(i1 %cmp0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
index 462090c6e89df..6d85cdceec4c6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
@@ -3,10 +3,10 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
-; GCN-LABEL: {{^}}gs_const:
+; GCN-LABEL: {{^}}ps_const:
; GCN-NOT: v_cmpx
; GCN: s_mov_b64 exec, 0
-define amdgpu_gs void @gs_const() {
+define amdgpu_ps void @ps_const() {
%tmp = icmp ule i32 0, 3
%tmp1 = select i1 %tmp, float 1.000000e+00, float -1.000000e+00
%c1 = fcmp oge float %tmp1, 0.0
@@ -37,7 +37,7 @@ define amdgpu_ps void @vcc_implicit_def(float %arg13, float %arg14) {
; GCN-LABEL: {{^}}true:
; GCN-NEXT: %bb.
; GCN-NEXT: s_endpgm
-define amdgpu_gs void @true() {
+define amdgpu_ps void @true() {
call void @llvm.amdgcn.kill(i1 true)
ret void
}
@@ -45,7 +45,7 @@ define amdgpu_gs void @true() {
; GCN-LABEL: {{^}}false:
; GCN-NOT: v_cmpx
; GCN: s_mov_b64 exec, 0
-define amdgpu_gs void @false() {
+define amdgpu_ps void @false() {
call void @llvm.amdgcn.kill(i1 false)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
ret void
@@ -58,7 +58,7 @@ define amdgpu_gs void @false() {
; GCN: s_and{{n2|_not1}}_b64 s[0:1], exec, s[0:1]
; GCN: s_and{{n2|_not1}}_b64 s[2:3], s[2:3], s[0:1]
; GCN: s_and_b64 exec, exec, s[2:3]
-define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
+define amdgpu_ps void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
%c1 = icmp slt i32 %a, %b
%c2 = icmp slt i32 %c, %d
%x = or i1 %c1, %c2
@@ -73,7 +73,7 @@ define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
; GCN: s_xor_b64 s[0:1]
; GCN: s_and{{n2|_not1}}_b64 s[2:3], s[2:3], s[0:1]
; GCN: s_and_b64 exec, exec, s[2:3]
-define amdgpu_gs void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
+define amdgpu_ps void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
%c1 = icmp slt i32 %a, %b
%c2 = icmp slt i32 %c, %d
%x = xor i1 %c1, %c2
@@ -85,7 +85,7 @@ define amdgpu_gs void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
; GCN-LABEL: {{^}}oeq:
; GCN: v_cmp_neq_f32
-define amdgpu_gs void @oeq(float %a) {
+define amdgpu_ps void @oeq(float %a) {
%c1 = fcmp oeq float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -94,7 +94,7 @@ define amdgpu_gs void @oeq(float %a) {
; GCN-LABEL: {{^}}ogt:
; GCN: v_cmp_nlt_f32
-define amdgpu_gs void @ogt(float %a) {
+define amdgpu_ps void @ogt(float %a) {
%c1 = fcmp ogt float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -103,7 +103,7 @@ define amdgpu_gs void @ogt(float %a) {
; GCN-LABEL: {{^}}oge:
; GCN: v_cmp_nle_f32
-define amdgpu_gs void @oge(float %a) {
+define amdgpu_ps void @oge(float %a) {
%c1 = fcmp oge float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -112,7 +112,7 @@ define amdgpu_gs void @oge(float %a) {
; GCN-LABEL: {{^}}olt:
; GCN: v_cmp_ngt_f32
-define amdgpu_gs void @olt(float %a) {
+define amdgpu_ps void @olt(float %a) {
%c1 = fcmp olt float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -121,7 +121,7 @@ define amdgpu_gs void @olt(float %a) {
; GCN-LABEL: {{^}}ole:
; GCN: v_cmp_nge_f32
-define amdgpu_gs void @ole(float %a) {
+define amdgpu_ps void @ole(float %a) {
%c1 = fcmp ole float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -130,7 +130,7 @@ define amdgpu_gs void @ole(float %a) {
; GCN-LABEL: {{^}}one:
; GCN: v_cmp_nlg_f32
-define amdgpu_gs void @one(float %a) {
+define amdgpu_ps void @one(float %a) {
%c1 = fcmp one float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -139,7 +139,7 @@ define amdgpu_gs void @one(float %a) {
; GCN-LABEL: {{^}}ord:
; GCN: v_cmp_o_f32
-define amdgpu_gs void @ord(float %a) {
+define amdgpu_ps void @ord(float %a) {
%c1 = fcmp ord float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -148,7 +148,7 @@ define amdgpu_gs void @ord(float %a) {
; GCN-LABEL: {{^}}uno:
; GCN: v_cmp_u_f32
-define amdgpu_gs void @uno(float %a) {
+define amdgpu_ps void @uno(float %a) {
%c1 = fcmp uno float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -157,7 +157,7 @@ define amdgpu_gs void @uno(float %a) {
; GCN-LABEL: {{^}}ueq:
; GCN: v_cmp_lg_f32
-define amdgpu_gs void @ueq(float %a) {
+define amdgpu_ps void @ueq(float %a) {
%c1 = fcmp ueq float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -166,7 +166,7 @@ define amdgpu_gs void @ueq(float %a) {
; GCN-LABEL: {{^}}ugt:
; GCN: v_cmp_ge_f32
-define amdgpu_gs void @ugt(float %a) {
+define amdgpu_ps void @ugt(float %a) {
%c1 = fcmp ugt float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -175,7 +175,7 @@ define amdgpu_gs void @ugt(float %a) {
; GCN-LABEL: {{^}}uge:
; GCN: v_cmp_gt_f32_e32 vcc, -1.0
-define amdgpu_gs void @uge(float %a) {
+define amdgpu_ps void @uge(float %a) {
%c1 = fcmp uge float %a, -1.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -184,7 +184,7 @@ define amdgpu_gs void @uge(float %a) {
; GCN-LABEL: {{^}}ult:
; GCN: v_cmp_le_f32_e32 vcc, -2.0
-define amdgpu_gs void @ult(float %a) {
+define amdgpu_ps void @ult(float %a) {
%c1 = fcmp ult float %a, -2.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -193,7 +193,7 @@ define amdgpu_gs void @ult(float %a) {
; GCN-LABEL: {{^}}ule:
; GCN: v_cmp_lt_f32_e32 vcc, 2.0
-define amdgpu_gs void @ule(float %a) {
+define amdgpu_ps void @ule(float %a) {
%c1 = fcmp ule float %a, 2.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -202,7 +202,7 @@ define amdgpu_gs void @ule(float %a) {
; GCN-LABEL: {{^}}une:
; GCN: v_cmp_eq_f32_e32 vcc, 0
-define amdgpu_gs void @une(float %a) {
+define amdgpu_ps void @une(float %a) {
%c1 = fcmp une float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -211,7 +211,7 @@ define amdgpu_gs void @une(float %a) {
; GCN-LABEL: {{^}}neg_olt:
; GCN: v_cmp_gt_f32_e32 vcc, 1.0
-define amdgpu_gs void @neg_olt(float %a) {
+define amdgpu_ps void @neg_olt(float %a) {
%c1 = fcmp olt float %a, 1.0
%c2 = xor i1 %c1, 1
call void @llvm.amdgcn.kill(i1 %c2)
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index 097154ed23ede..50f1a3ae44f63 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -1760,7 +1760,7 @@ define amdgpu_ps void @test_kill_i1_terminator_float() #0 {
ret void
}
-define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d) #0 {
+define amdgpu_ps void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d) #0 {
; GFX1032-LABEL: test_kill_i1_terminator_i1:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v1
@@ -1769,12 +1769,15 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d
; GFX1032-NEXT: s_or_b32 s0, vcc_lo, s0
; GFX1032-NEXT: s_andn2_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_andn2_b32 s1, s1, s0
+; GFX1032-NEXT: s_cbranch_scc0 .LBB32_2
+; GFX1032-NEXT: ; %bb.1:
; GFX1032-NEXT: s_and_b32 exec_lo, exec_lo, s1
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-NEXT: exp mrt0 off, off, off, off
; GFX1032-NEXT: s_endpgm
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: .LBB32_2:
; GFX1032-NEXT: s_mov_b32 exec_lo, 0
+; GFX1032-NEXT: exp null off, off, off, off done vm
; GFX1032-NEXT: s_endpgm
;
; GFX1064-LABEL: test_kill_i1_terminator_i1:
@@ -1785,12 +1788,15 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d
; GFX1064-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX1064-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
+; GFX1064-NEXT: s_cbranch_scc0 .LBB32_2
+; GFX1064-NEXT: ; %bb.1:
; GFX1064-NEXT: s_and_b64 exec, exec, s[2:3]
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
; GFX1064-NEXT: exp mrt0 off, off, off, off
; GFX1064-NEXT: s_endpgm
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: .LBB32_2:
; GFX1064-NEXT: s_mov_b64 exec, 0
+; GFX1064-NEXT: exp null off, off, off, off done vm
; GFX1064-NEXT: s_endpgm
%c1 = icmp slt i32 %a, %b
%c2 = icmp slt i32 %c, %d
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 077da9cda6523..95fc24573c058 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -2727,7 +2727,7 @@ main_body:
declare void @llvm.amdgcn.kill(i1)
-define void @kill_true() {
+define amdgpu_ps void @kill_true() {
; CHECK-LABEL: @kill_true(
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Verifier/amdgpu-intrinsic.ll b/llvm/test/Verifier/amdgpu-intrinsic.ll
new file mode 100644
index 0000000000000..7745e0198b36d
--- /dev/null
+++ b/llvm/test/Verifier/amdgpu-intrinsic.ll
@@ -0,0 +1,34 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+target datalayout = "A5"
+
+declare void @llvm.amdgcn.kill(i1)
+declare void @llvm.amdgcn.wqm.demote(i1)
+
+; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
+; CHECK-NEXT: call void @llvm.amdgcn.kill(i1 true)
+define amdgpu_cs void @cs_kill() {
+ call void @llvm.amdgcn.kill(i1 true)
+ ret void
+}
+
+; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
+; CHECK-NEXT: call void @llvm.amdgcn.kill(i1 true)
+define amdgpu_gs void @gs_kill() {
+ call void @llvm.amdgcn.kill(i1 true)
+ ret void
+}
+
+; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
+; CHECK-NEXT: call void @llvm.amdgcn.wqm.demote(i1 true)
+define amdgpu_cs void @cs_wqm_demote() {
+ call void @llvm.amdgcn.wqm.demote(i1 true)
+ ret void
+}
+
+; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
+; CHECK-NEXT: call void @llvm.amdgcn.wqm.demote(i1 true)
+define amdgpu_gs void @gs_wqm_demote() {
+ call void @llvm.amdgcn.wqm.demote(i1 true)
+ ret void
+}
|
@llvm/pr-subscribers-backend-amdgpu Author: Carl Ritson (perlfu) ChangesOnly amdgpu_ps shaders should be calling llvm.amdgcn.kill and llvm.amdgcn.wqm.demote. Enforce this through the verifier. Full diff: https://github.com/llvm/llvm-project/pull/151922.diff 6 Files Affected:
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 3ff9895e161c4..90c60b5d64841 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6714,6 +6714,13 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"invalid vector type for format", &Call, Src0, Call.getArgOperand(0));
Check(Src1Ty->getNumElements() >= getFormatNumRegs(FmtB),
"invalid vector type for format", &Call, Src1, Call.getArgOperand(2));
+ }
+ case Intrinsic::amdgcn_kill:
+ case Intrinsic::amdgcn_wqm_demote: {
+ Check(Call.getCaller()->getCallingConv() == CallingConv::AMDGPU_PS,
+ "Intrinsic can only be used from functions with the amdgpu_ps"
+ " calling convention ",
+ &Call);
break;
}
case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32:
diff --git a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
index b63fff38f34f6..8fb7c5daf081e 100644
--- a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
+++ b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
@@ -145,10 +145,10 @@ define amdgpu_kernel void @test_flush_f64_outputs(ptr addrspace(1) %out0, ptr ad
ret void
}
-; GCN-LABEL: {{^}}kill_gs_const:
+; GCN-LABEL: {{^}}kill_ps_const:
; GCN: FloatMode: 240
; GCN: IeeeMode: 0
-define amdgpu_gs void @kill_gs_const() {
+define amdgpu_ps void @kill_ps_const() {
main_body:
%cmp0 = icmp ule i32 0, 3
call void @llvm.amdgcn.kill(i1 %cmp0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
index 462090c6e89df..6d85cdceec4c6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
@@ -3,10 +3,10 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
-; GCN-LABEL: {{^}}gs_const:
+; GCN-LABEL: {{^}}ps_const:
; GCN-NOT: v_cmpx
; GCN: s_mov_b64 exec, 0
-define amdgpu_gs void @gs_const() {
+define amdgpu_ps void @ps_const() {
%tmp = icmp ule i32 0, 3
%tmp1 = select i1 %tmp, float 1.000000e+00, float -1.000000e+00
%c1 = fcmp oge float %tmp1, 0.0
@@ -37,7 +37,7 @@ define amdgpu_ps void @vcc_implicit_def(float %arg13, float %arg14) {
; GCN-LABEL: {{^}}true:
; GCN-NEXT: %bb.
; GCN-NEXT: s_endpgm
-define amdgpu_gs void @true() {
+define amdgpu_ps void @true() {
call void @llvm.amdgcn.kill(i1 true)
ret void
}
@@ -45,7 +45,7 @@ define amdgpu_gs void @true() {
; GCN-LABEL: {{^}}false:
; GCN-NOT: v_cmpx
; GCN: s_mov_b64 exec, 0
-define amdgpu_gs void @false() {
+define amdgpu_ps void @false() {
call void @llvm.amdgcn.kill(i1 false)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
ret void
@@ -58,7 +58,7 @@ define amdgpu_gs void @false() {
; GCN: s_and{{n2|_not1}}_b64 s[0:1], exec, s[0:1]
; GCN: s_and{{n2|_not1}}_b64 s[2:3], s[2:3], s[0:1]
; GCN: s_and_b64 exec, exec, s[2:3]
-define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
+define amdgpu_ps void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
%c1 = icmp slt i32 %a, %b
%c2 = icmp slt i32 %c, %d
%x = or i1 %c1, %c2
@@ -73,7 +73,7 @@ define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
; GCN: s_xor_b64 s[0:1]
; GCN: s_and{{n2|_not1}}_b64 s[2:3], s[2:3], s[0:1]
; GCN: s_and_b64 exec, exec, s[2:3]
-define amdgpu_gs void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
+define amdgpu_ps void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
%c1 = icmp slt i32 %a, %b
%c2 = icmp slt i32 %c, %d
%x = xor i1 %c1, %c2
@@ -85,7 +85,7 @@ define amdgpu_gs void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
; GCN-LABEL: {{^}}oeq:
; GCN: v_cmp_neq_f32
-define amdgpu_gs void @oeq(float %a) {
+define amdgpu_ps void @oeq(float %a) {
%c1 = fcmp oeq float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -94,7 +94,7 @@ define amdgpu_gs void @oeq(float %a) {
; GCN-LABEL: {{^}}ogt:
; GCN: v_cmp_nlt_f32
-define amdgpu_gs void @ogt(float %a) {
+define amdgpu_ps void @ogt(float %a) {
%c1 = fcmp ogt float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -103,7 +103,7 @@ define amdgpu_gs void @ogt(float %a) {
; GCN-LABEL: {{^}}oge:
; GCN: v_cmp_nle_f32
-define amdgpu_gs void @oge(float %a) {
+define amdgpu_ps void @oge(float %a) {
%c1 = fcmp oge float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -112,7 +112,7 @@ define amdgpu_gs void @oge(float %a) {
; GCN-LABEL: {{^}}olt:
; GCN: v_cmp_ngt_f32
-define amdgpu_gs void @olt(float %a) {
+define amdgpu_ps void @olt(float %a) {
%c1 = fcmp olt float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -121,7 +121,7 @@ define amdgpu_gs void @olt(float %a) {
; GCN-LABEL: {{^}}ole:
; GCN: v_cmp_nge_f32
-define amdgpu_gs void @ole(float %a) {
+define amdgpu_ps void @ole(float %a) {
%c1 = fcmp ole float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -130,7 +130,7 @@ define amdgpu_gs void @ole(float %a) {
; GCN-LABEL: {{^}}one:
; GCN: v_cmp_nlg_f32
-define amdgpu_gs void @one(float %a) {
+define amdgpu_ps void @one(float %a) {
%c1 = fcmp one float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -139,7 +139,7 @@ define amdgpu_gs void @one(float %a) {
; GCN-LABEL: {{^}}ord:
; GCN: v_cmp_o_f32
-define amdgpu_gs void @ord(float %a) {
+define amdgpu_ps void @ord(float %a) {
%c1 = fcmp ord float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -148,7 +148,7 @@ define amdgpu_gs void @ord(float %a) {
; GCN-LABEL: {{^}}uno:
; GCN: v_cmp_u_f32
-define amdgpu_gs void @uno(float %a) {
+define amdgpu_ps void @uno(float %a) {
%c1 = fcmp uno float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -157,7 +157,7 @@ define amdgpu_gs void @uno(float %a) {
; GCN-LABEL: {{^}}ueq:
; GCN: v_cmp_lg_f32
-define amdgpu_gs void @ueq(float %a) {
+define amdgpu_ps void @ueq(float %a) {
%c1 = fcmp ueq float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -166,7 +166,7 @@ define amdgpu_gs void @ueq(float %a) {
; GCN-LABEL: {{^}}ugt:
; GCN: v_cmp_ge_f32
-define amdgpu_gs void @ugt(float %a) {
+define amdgpu_ps void @ugt(float %a) {
%c1 = fcmp ugt float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -175,7 +175,7 @@ define amdgpu_gs void @ugt(float %a) {
; GCN-LABEL: {{^}}uge:
; GCN: v_cmp_gt_f32_e32 vcc, -1.0
-define amdgpu_gs void @uge(float %a) {
+define amdgpu_ps void @uge(float %a) {
%c1 = fcmp uge float %a, -1.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -184,7 +184,7 @@ define amdgpu_gs void @uge(float %a) {
; GCN-LABEL: {{^}}ult:
; GCN: v_cmp_le_f32_e32 vcc, -2.0
-define amdgpu_gs void @ult(float %a) {
+define amdgpu_ps void @ult(float %a) {
%c1 = fcmp ult float %a, -2.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -193,7 +193,7 @@ define amdgpu_gs void @ult(float %a) {
; GCN-LABEL: {{^}}ule:
; GCN: v_cmp_lt_f32_e32 vcc, 2.0
-define amdgpu_gs void @ule(float %a) {
+define amdgpu_ps void @ule(float %a) {
%c1 = fcmp ule float %a, 2.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -202,7 +202,7 @@ define amdgpu_gs void @ule(float %a) {
; GCN-LABEL: {{^}}une:
; GCN: v_cmp_eq_f32_e32 vcc, 0
-define amdgpu_gs void @une(float %a) {
+define amdgpu_ps void @une(float %a) {
%c1 = fcmp une float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -211,7 +211,7 @@ define amdgpu_gs void @une(float %a) {
; GCN-LABEL: {{^}}neg_olt:
; GCN: v_cmp_gt_f32_e32 vcc, 1.0
-define amdgpu_gs void @neg_olt(float %a) {
+define amdgpu_ps void @neg_olt(float %a) {
%c1 = fcmp olt float %a, 1.0
%c2 = xor i1 %c1, 1
call void @llvm.amdgcn.kill(i1 %c2)
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index 097154ed23ede..50f1a3ae44f63 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -1760,7 +1760,7 @@ define amdgpu_ps void @test_kill_i1_terminator_float() #0 {
ret void
}
-define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d) #0 {
+define amdgpu_ps void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d) #0 {
; GFX1032-LABEL: test_kill_i1_terminator_i1:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v1
@@ -1769,12 +1769,15 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d
; GFX1032-NEXT: s_or_b32 s0, vcc_lo, s0
; GFX1032-NEXT: s_andn2_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_andn2_b32 s1, s1, s0
+; GFX1032-NEXT: s_cbranch_scc0 .LBB32_2
+; GFX1032-NEXT: ; %bb.1:
; GFX1032-NEXT: s_and_b32 exec_lo, exec_lo, s1
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-NEXT: exp mrt0 off, off, off, off
; GFX1032-NEXT: s_endpgm
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: .LBB32_2:
; GFX1032-NEXT: s_mov_b32 exec_lo, 0
+; GFX1032-NEXT: exp null off, off, off, off done vm
; GFX1032-NEXT: s_endpgm
;
; GFX1064-LABEL: test_kill_i1_terminator_i1:
@@ -1785,12 +1788,15 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d
; GFX1064-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX1064-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
+; GFX1064-NEXT: s_cbranch_scc0 .LBB32_2
+; GFX1064-NEXT: ; %bb.1:
; GFX1064-NEXT: s_and_b64 exec, exec, s[2:3]
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
; GFX1064-NEXT: exp mrt0 off, off, off, off
; GFX1064-NEXT: s_endpgm
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: .LBB32_2:
; GFX1064-NEXT: s_mov_b64 exec, 0
+; GFX1064-NEXT: exp null off, off, off, off done vm
; GFX1064-NEXT: s_endpgm
%c1 = icmp slt i32 %a, %b
%c2 = icmp slt i32 %c, %d
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 077da9cda6523..95fc24573c058 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -2727,7 +2727,7 @@ main_body:
declare void @llvm.amdgcn.kill(i1)
-define void @kill_true() {
+define amdgpu_ps void @kill_true() {
; CHECK-LABEL: @kill_true(
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Verifier/amdgpu-intrinsic.ll b/llvm/test/Verifier/amdgpu-intrinsic.ll
new file mode 100644
index 0000000000000..7745e0198b36d
--- /dev/null
+++ b/llvm/test/Verifier/amdgpu-intrinsic.ll
@@ -0,0 +1,34 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+target datalayout = "A5"
+
+declare void @llvm.amdgcn.kill(i1)
+declare void @llvm.amdgcn.wqm.demote(i1)
+
+; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
+; CHECK-NEXT: call void @llvm.amdgcn.kill(i1 true)
+define amdgpu_cs void @cs_kill() {
+ call void @llvm.amdgcn.kill(i1 true)
+ ret void
+}
+
+; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
+; CHECK-NEXT: call void @llvm.amdgcn.kill(i1 true)
+define amdgpu_gs void @gs_kill() {
+ call void @llvm.amdgcn.kill(i1 true)
+ ret void
+}
+
+; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
+; CHECK-NEXT: call void @llvm.amdgcn.wqm.demote(i1 true)
+define amdgpu_cs void @cs_wqm_demote() {
+ call void @llvm.amdgcn.wqm.demote(i1 true)
+ ret void
+}
+
+; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
+; CHECK-NEXT: call void @llvm.amdgcn.wqm.demote(i1 true)
+define amdgpu_gs void @gs_wqm_demote() {
+ call void @llvm.amdgcn.wqm.demote(i1 true)
+ ret void
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should be generalizing kill support. In particular this should work in callable functions. Restricting to a specific entry point is bad
Is there a use case for generalizing it? |
Only amdgpu_ps shaders should be calling llvm.amdgcn.kill and llvm.amdgcn.wqm.demote. Enforce this through the verifier.