From c33c756a991af63089182f419ed9c51f4ec9ef1e Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Mon, 4 Aug 2025 17:15:35 +0900 Subject: [PATCH] [AMDGPU][Verifier] Limit kill/wqm.demote intrinsics to PS shaders Only amdgpu_ps shaders should be calling llvm.amdgcn.kill and llvm.amdgcn.wqm.demote. Enforce this through the verifier. --- llvm/lib/IR/Verifier.cpp | 7 ++++ llvm/test/CodeGen/AMDGPU/default-fp-mode.ll | 4 +- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll | 42 +++++++++---------- llvm/test/CodeGen/AMDGPU/wave32.ll | 12 ++++-- .../InstCombine/AMDGPU/amdgcn-intrinsics.ll | 2 +- llvm/test/Verifier/amdgpu-intrinsic.ll | 34 +++++++++++++++ 6 files changed, 74 insertions(+), 27 deletions(-) create mode 100644 llvm/test/Verifier/amdgpu-intrinsic.ll diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 3ff9895e161c4..90c60b5d64841 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6714,6 +6714,13 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "invalid vector type for format", &Call, Src0, Call.getArgOperand(0)); Check(Src1Ty->getNumElements() >= getFormatNumRegs(FmtB), "invalid vector type for format", &Call, Src1, Call.getArgOperand(2)); + } + case Intrinsic::amdgcn_kill: + case Intrinsic::amdgcn_wqm_demote: { + Check(Call.getCaller()->getCallingConv() == CallingConv::AMDGPU_PS, + "Intrinsic can only be used from functions with the amdgpu_ps" + " calling convention ", + &Call); break; } case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32: diff --git a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll index b63fff38f34f6..8fb7c5daf081e 100644 --- a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll +++ b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll @@ -145,10 +145,10 @@ define amdgpu_kernel void @test_flush_f64_outputs(ptr addrspace(1) %out0, ptr ad ret void } -; GCN-LABEL: {{^}}kill_gs_const: +; GCN-LABEL: {{^}}kill_ps_const: ; GCN: FloatMode: 240 ; GCN: IeeeMode: 0 -define amdgpu_gs void @kill_gs_const() { +define amdgpu_ps void @kill_ps_const() { main_body: %cmp0 = icmp ule i32 0, 3 call void @llvm.amdgcn.kill(i1 %cmp0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll index 462090c6e89df..6d85cdceec4c6 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll @@ -3,10 +3,10 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s -; GCN-LABEL: {{^}}gs_const: +; GCN-LABEL: {{^}}ps_const: ; GCN-NOT: v_cmpx ; GCN: s_mov_b64 exec, 0 -define amdgpu_gs void @gs_const() { +define amdgpu_ps void @ps_const() { %tmp = icmp ule i32 0, 3 %tmp1 = select i1 %tmp, float 1.000000e+00, float -1.000000e+00 %c1 = fcmp oge float %tmp1, 0.0 @@ -37,7 +37,7 @@ define amdgpu_ps void @vcc_implicit_def(float %arg13, float %arg14) { ; GCN-LABEL: {{^}}true: ; GCN-NEXT: %bb. ; GCN-NEXT: s_endpgm -define amdgpu_gs void @true() { +define amdgpu_ps void @true() { call void @llvm.amdgcn.kill(i1 true) ret void } @@ -45,7 +45,7 @@ define amdgpu_gs void @true() { ; GCN-LABEL: {{^}}false: ; GCN-NOT: v_cmpx ; GCN: s_mov_b64 exec, 0 -define amdgpu_gs void @false() { +define amdgpu_ps void @false() { call void @llvm.amdgcn.kill(i1 false) call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0) ret void @@ -58,7 +58,7 @@ define amdgpu_gs void @false() { ; GCN: s_and{{n2|_not1}}_b64 s[0:1], exec, s[0:1] ; GCN: s_and{{n2|_not1}}_b64 s[2:3], s[2:3], s[0:1] ; GCN: s_and_b64 exec, exec, s[2:3] -define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) { +define amdgpu_ps void @and(i32 %a, i32 %b, i32 %c, i32 %d) { %c1 = icmp slt i32 %a, %b %c2 = icmp slt i32 %c, %d %x = or i1 %c1, %c2 @@ -73,7 +73,7 @@ define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) { ; GCN: s_xor_b64 s[0:1] ; GCN: s_and{{n2|_not1}}_b64 s[2:3], s[2:3], s[0:1] ; GCN: s_and_b64 exec, exec, s[2:3] -define amdgpu_gs void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) { +define amdgpu_ps void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) { %c1 = icmp slt i32 %a, %b %c2 = icmp slt i32 %c, %d %x = xor i1 %c1, %c2 @@ -85,7 +85,7 @@ define amdgpu_gs void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) { ; GCN-LABEL: {{^}}oeq: ; GCN: v_cmp_neq_f32 -define amdgpu_gs void @oeq(float %a) { +define amdgpu_ps void @oeq(float %a) { %c1 = fcmp oeq float %a, 0.0 call void @llvm.amdgcn.kill(i1 %c1) call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0) @@ -94,7 +94,7 @@ define amdgpu_gs void @oeq(float %a) { ; GCN-LABEL: {{^}}ogt: ; GCN: v_cmp_nlt_f32 -define amdgpu_gs void @ogt(float %a) { +define amdgpu_ps void @ogt(float %a) { %c1 = fcmp ogt float %a, 0.0 call void @llvm.amdgcn.kill(i1 %c1) call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0) @@ -103,7 +103,7 @@ define amdgpu_gs void @ogt(float %a) { ; GCN-LABEL: {{^}}oge: ; GCN: v_cmp_nle_f32 -define amdgpu_gs void @oge(float %a) { +define amdgpu_ps void @oge(float %a) { %c1 = fcmp oge float %a, 0.0 call void @llvm.amdgcn.kill(i1 %c1) call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0) @@ -112,7 +112,7 @@ define amdgpu_gs void @oge(float %a) { ; GCN-LABEL: {{^}}olt: ; GCN: v_cmp_ngt_f32 -define amdgpu_gs void @olt(float %a) { +define amdgpu_ps void @olt(float %a) { %c1 = fcmp olt float %a, 0.0 call void @llvm.amdgcn.kill(i1 %c1) call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0) @@ -121,7 +121,7 @@ define amdgpu_gs void @olt(float %a) { ; GCN-LABEL: {{^}}ole: ; GCN: v_cmp_nge_f32 -define amdgpu_gs void @ole(float %a) { +define amdgpu_ps void @ole(float %a) { %c1 = fcmp ole float %a, 0.0 call void @llvm.amdgcn.kill(i1 %c1) call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0) @@ -130,7 +130,7 @@ define amdgpu_gs void @ole(float %a) { ; GCN-LABEL: {{^}}one: ; GCN: v_cmp_nlg_f32 -define amdgpu_gs void @one(float %a) { +define amdgpu_ps void @one(float %a) { %c1 = fcmp one float %a, 0.0 call void @llvm.amdgcn.kill(i1 %c1) call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0) @@ -139,7 +139,7 @@ define amdgpu_gs void @one(float %a) { ; GCN-LABEL: {{^}}ord: ; GCN: v_cmp_o_f32 -define amdgpu_gs void @ord(float %a) { +define amdgpu_ps void @ord(float %a) { %c1 = fcmp ord float %a, 0.0 call void @llvm.amdgcn.kill(i1 %c1) call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0) @@ -148,7 +148,7 @@ define amdgpu_gs void @ord(float %a) { ; GCN-LABEL: {{^}}uno: ; GCN: v_cmp_u_f32 -define amdgpu_gs void @uno(float %a) { +define amdgpu_ps void @uno(float %a) { %c1 = fcmp uno float %a, 0.0 call void @llvm.amdgcn.kill(i1 %c1) call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0) @@ -157,7 +157,7 @@ define amdgpu_gs void @uno(float %a) { ; GCN-LABEL: {{^}}ueq: ; GCN: v_cmp_lg_f32 -define amdgpu_gs void @ueq(float %a) { +define amdgpu_ps void @ueq(float %a) { %c1 = fcmp ueq float %a, 0.0 call void @llvm.amdgcn.kill(i1 %c1) call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0) @@ -166,7 +166,7 @@ define amdgpu_gs void @ueq(float %a) { ; GCN-LABEL: {{^}}ugt: ; GCN: v_cmp_ge_f32 -define amdgpu_gs void @ugt(float %a) { +define amdgpu_ps void @ugt(float %a) { %c1 = fcmp ugt float %a, 0.0 call void @llvm.amdgcn.kill(i1 %c1) call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0) @@ -175,7 +175,7 @@ define amdgpu_gs void @ugt(float %a) { ; GCN-LABEL: {{^}}uge: ; GCN: v_cmp_gt_f32_e32 vcc, -1.0 -define amdgpu_gs void @uge(float %a) { +define amdgpu_ps void @uge(float %a) { %c1 = fcmp uge float %a, -1.0 call void @llvm.amdgcn.kill(i1 %c1) call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0) @@ -184,7 +184,7 @@ define amdgpu_gs void @uge(float %a) { ; GCN-LABEL: {{^}}ult: ; GCN: v_cmp_le_f32_e32 vcc, -2.0 -define amdgpu_gs void @ult(float %a) { +define amdgpu_ps void @ult(float %a) { %c1 = fcmp ult float %a, -2.0 call void @llvm.amdgcn.kill(i1 %c1) call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0) @@ -193,7 +193,7 @@ define amdgpu_gs void @ult(float %a) { ; GCN-LABEL: {{^}}ule: ; GCN: v_cmp_lt_f32_e32 vcc, 2.0 -define amdgpu_gs void @ule(float %a) { +define amdgpu_ps void @ule(float %a) { %c1 = fcmp ule float %a, 2.0 call void @llvm.amdgcn.kill(i1 %c1) call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0) @@ -202,7 +202,7 @@ define amdgpu_gs void @ule(float %a) { ; GCN-LABEL: {{^}}une: ; GCN: v_cmp_eq_f32_e32 vcc, 0 -define amdgpu_gs void @une(float %a) { +define amdgpu_ps void @une(float %a) { %c1 = fcmp une float %a, 0.0 call void @llvm.amdgcn.kill(i1 %c1) call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0) @@ -211,7 +211,7 @@ define amdgpu_gs void @une(float %a) { ; GCN-LABEL: {{^}}neg_olt: ; GCN: v_cmp_gt_f32_e32 vcc, 1.0 -define amdgpu_gs void @neg_olt(float %a) { +define amdgpu_ps void @neg_olt(float %a) { %c1 = fcmp olt float %a, 1.0 %c2 = xor i1 %c1, 1 call void @llvm.amdgcn.kill(i1 %c2) diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index 097154ed23ede..50f1a3ae44f63 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -1760,7 +1760,7 @@ define amdgpu_ps void @test_kill_i1_terminator_float() #0 { ret void } -define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d) #0 { +define amdgpu_ps void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d) #0 { ; GFX1032-LABEL: test_kill_i1_terminator_i1: ; GFX1032: ; %bb.0: ; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v1 @@ -1769,12 +1769,15 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d ; GFX1032-NEXT: s_or_b32 s0, vcc_lo, s0 ; GFX1032-NEXT: s_andn2_b32 s0, exec_lo, s0 ; GFX1032-NEXT: s_andn2_b32 s1, s1, s0 +; GFX1032-NEXT: s_cbranch_scc0 .LBB32_2 +; GFX1032-NEXT: ; %bb.1: ; GFX1032-NEXT: s_and_b32 exec_lo, exec_lo, s1 ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: exp mrt0 off, off, off, off ; GFX1032-NEXT: s_endpgm -; GFX1032-NEXT: ; %bb.1: +; GFX1032-NEXT: .LBB32_2: ; GFX1032-NEXT: s_mov_b32 exec_lo, 0 +; GFX1032-NEXT: exp null off, off, off, off done vm ; GFX1032-NEXT: s_endpgm ; ; GFX1064-LABEL: test_kill_i1_terminator_i1: @@ -1785,12 +1788,15 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d ; GFX1064-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX1064-NEXT: s_andn2_b64 s[0:1], exec, s[0:1] ; GFX1064-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1] +; GFX1064-NEXT: s_cbranch_scc0 .LBB32_2 +; GFX1064-NEXT: ; %bb.1: ; GFX1064-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: exp mrt0 off, off, off, off ; GFX1064-NEXT: s_endpgm -; GFX1064-NEXT: ; %bb.1: +; GFX1064-NEXT: .LBB32_2: ; GFX1064-NEXT: s_mov_b64 exec, 0 +; GFX1064-NEXT: exp null off, off, off, off done vm ; GFX1064-NEXT: s_endpgm %c1 = icmp slt i32 %a, %b %c2 = icmp slt i32 %c, %d diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll index 077da9cda6523..95fc24573c058 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -2727,7 +2727,7 @@ main_body: declare void @llvm.amdgcn.kill(i1) -define void @kill_true() { +define amdgpu_ps void @kill_true() { ; CHECK-LABEL: @kill_true( ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Verifier/amdgpu-intrinsic.ll b/llvm/test/Verifier/amdgpu-intrinsic.ll new file mode 100644 index 0000000000000..7745e0198b36d --- /dev/null +++ b/llvm/test/Verifier/amdgpu-intrinsic.ll @@ -0,0 +1,34 @@ +; RUN: not llvm-as < %s 2>&1 | FileCheck %s + +target datalayout = "A5" + +declare void @llvm.amdgcn.kill(i1) +declare void @llvm.amdgcn.wqm.demote(i1) + +; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention +; CHECK-NEXT: call void @llvm.amdgcn.kill(i1 true) +define amdgpu_cs void @cs_kill() { + call void @llvm.amdgcn.kill(i1 true) + ret void +} + +; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention +; CHECK-NEXT: call void @llvm.amdgcn.kill(i1 true) +define amdgpu_gs void @gs_kill() { + call void @llvm.amdgcn.kill(i1 true) + ret void +} + +; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention +; CHECK-NEXT: call void @llvm.amdgcn.wqm.demote(i1 true) +define amdgpu_cs void @cs_wqm_demote() { + call void @llvm.amdgcn.wqm.demote(i1 true) + ret void +} + +; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention +; CHECK-NEXT: call void @llvm.amdgcn.wqm.demote(i1 true) +define amdgpu_gs void @gs_wqm_demote() { + call void @llvm.amdgcn.wqm.demote(i1 true) + ret void +}