Skip to content

[AMDGPU][Verifier] Limit kill/wqm.demote intrinsics to PS shaders #151922

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions llvm/lib/IR/Verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6714,6 +6714,13 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"invalid vector type for format", &Call, Src0, Call.getArgOperand(0));
Check(Src1Ty->getNumElements() >= getFormatNumRegs(FmtB),
"invalid vector type for format", &Call, Src1, Call.getArgOperand(2));
}
case Intrinsic::amdgcn_kill:
case Intrinsic::amdgcn_wqm_demote: {
Check(Call.getCaller()->getCallingConv() == CallingConv::AMDGPU_PS,
"Intrinsic can only be used from functions with the amdgpu_ps"
" calling convention ",
&Call);
break;
}
case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,10 @@ define amdgpu_kernel void @test_flush_f64_outputs(ptr addrspace(1) %out0, ptr ad
ret void
}

; GCN-LABEL: {{^}}kill_gs_const:
; GCN-LABEL: {{^}}kill_ps_const:
; GCN: FloatMode: 240
; GCN: IeeeMode: 0
define amdgpu_gs void @kill_gs_const() {
define amdgpu_ps void @kill_ps_const() {
main_body:
%cmp0 = icmp ule i32 0, 3
call void @llvm.amdgcn.kill(i1 %cmp0)
Expand Down
42 changes: 21 additions & 21 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s

; GCN-LABEL: {{^}}gs_const:
; GCN-LABEL: {{^}}ps_const:
; GCN-NOT: v_cmpx
; GCN: s_mov_b64 exec, 0
define amdgpu_gs void @gs_const() {
define amdgpu_ps void @ps_const() {
%tmp = icmp ule i32 0, 3
%tmp1 = select i1 %tmp, float 1.000000e+00, float -1.000000e+00
%c1 = fcmp oge float %tmp1, 0.0
Expand Down Expand Up @@ -37,15 +37,15 @@ define amdgpu_ps void @vcc_implicit_def(float %arg13, float %arg14) {
; GCN-LABEL: {{^}}true:
; GCN-NEXT: %bb.
; GCN-NEXT: s_endpgm
define amdgpu_gs void @true() {
define amdgpu_ps void @true() {
call void @llvm.amdgcn.kill(i1 true)
ret void
}

; GCN-LABEL: {{^}}false:
; GCN-NOT: v_cmpx
; GCN: s_mov_b64 exec, 0
define amdgpu_gs void @false() {
define amdgpu_ps void @false() {
call void @llvm.amdgcn.kill(i1 false)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
ret void
Expand All @@ -58,7 +58,7 @@ define amdgpu_gs void @false() {
; GCN: s_and{{n2|_not1}}_b64 s[0:1], exec, s[0:1]
; GCN: s_and{{n2|_not1}}_b64 s[2:3], s[2:3], s[0:1]
; GCN: s_and_b64 exec, exec, s[2:3]
define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
define amdgpu_ps void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
%c1 = icmp slt i32 %a, %b
%c2 = icmp slt i32 %c, %d
%x = or i1 %c1, %c2
Expand All @@ -73,7 +73,7 @@ define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
; GCN: s_xor_b64 s[0:1]
; GCN: s_and{{n2|_not1}}_b64 s[2:3], s[2:3], s[0:1]
; GCN: s_and_b64 exec, exec, s[2:3]
define amdgpu_gs void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
define amdgpu_ps void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
%c1 = icmp slt i32 %a, %b
%c2 = icmp slt i32 %c, %d
%x = xor i1 %c1, %c2
Expand All @@ -85,7 +85,7 @@ define amdgpu_gs void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {

; GCN-LABEL: {{^}}oeq:
; GCN: v_cmp_neq_f32
define amdgpu_gs void @oeq(float %a) {
define amdgpu_ps void @oeq(float %a) {
%c1 = fcmp oeq float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
Expand All @@ -94,7 +94,7 @@ define amdgpu_gs void @oeq(float %a) {

; GCN-LABEL: {{^}}ogt:
; GCN: v_cmp_nlt_f32
define amdgpu_gs void @ogt(float %a) {
define amdgpu_ps void @ogt(float %a) {
%c1 = fcmp ogt float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
Expand All @@ -103,7 +103,7 @@ define amdgpu_gs void @ogt(float %a) {

; GCN-LABEL: {{^}}oge:
; GCN: v_cmp_nle_f32
define amdgpu_gs void @oge(float %a) {
define amdgpu_ps void @oge(float %a) {
%c1 = fcmp oge float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
Expand All @@ -112,7 +112,7 @@ define amdgpu_gs void @oge(float %a) {

; GCN-LABEL: {{^}}olt:
; GCN: v_cmp_ngt_f32
define amdgpu_gs void @olt(float %a) {
define amdgpu_ps void @olt(float %a) {
%c1 = fcmp olt float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
Expand All @@ -121,7 +121,7 @@ define amdgpu_gs void @olt(float %a) {

; GCN-LABEL: {{^}}ole:
; GCN: v_cmp_nge_f32
define amdgpu_gs void @ole(float %a) {
define amdgpu_ps void @ole(float %a) {
%c1 = fcmp ole float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
Expand All @@ -130,7 +130,7 @@ define amdgpu_gs void @ole(float %a) {

; GCN-LABEL: {{^}}one:
; GCN: v_cmp_nlg_f32
define amdgpu_gs void @one(float %a) {
define amdgpu_ps void @one(float %a) {
%c1 = fcmp one float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
Expand All @@ -139,7 +139,7 @@ define amdgpu_gs void @one(float %a) {

; GCN-LABEL: {{^}}ord:
; GCN: v_cmp_o_f32
define amdgpu_gs void @ord(float %a) {
define amdgpu_ps void @ord(float %a) {
%c1 = fcmp ord float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
Expand All @@ -148,7 +148,7 @@ define amdgpu_gs void @ord(float %a) {

; GCN-LABEL: {{^}}uno:
; GCN: v_cmp_u_f32
define amdgpu_gs void @uno(float %a) {
define amdgpu_ps void @uno(float %a) {
%c1 = fcmp uno float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
Expand All @@ -157,7 +157,7 @@ define amdgpu_gs void @uno(float %a) {

; GCN-LABEL: {{^}}ueq:
; GCN: v_cmp_lg_f32
define amdgpu_gs void @ueq(float %a) {
define amdgpu_ps void @ueq(float %a) {
%c1 = fcmp ueq float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
Expand All @@ -166,7 +166,7 @@ define amdgpu_gs void @ueq(float %a) {

; GCN-LABEL: {{^}}ugt:
; GCN: v_cmp_ge_f32
define amdgpu_gs void @ugt(float %a) {
define amdgpu_ps void @ugt(float %a) {
%c1 = fcmp ugt float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
Expand All @@ -175,7 +175,7 @@ define amdgpu_gs void @ugt(float %a) {

; GCN-LABEL: {{^}}uge:
; GCN: v_cmp_gt_f32_e32 vcc, -1.0
define amdgpu_gs void @uge(float %a) {
define amdgpu_ps void @uge(float %a) {
%c1 = fcmp uge float %a, -1.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
Expand All @@ -184,7 +184,7 @@ define amdgpu_gs void @uge(float %a) {

; GCN-LABEL: {{^}}ult:
; GCN: v_cmp_le_f32_e32 vcc, -2.0
define amdgpu_gs void @ult(float %a) {
define amdgpu_ps void @ult(float %a) {
%c1 = fcmp ult float %a, -2.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
Expand All @@ -193,7 +193,7 @@ define amdgpu_gs void @ult(float %a) {

; GCN-LABEL: {{^}}ule:
; GCN: v_cmp_lt_f32_e32 vcc, 2.0
define amdgpu_gs void @ule(float %a) {
define amdgpu_ps void @ule(float %a) {
%c1 = fcmp ule float %a, 2.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
Expand All @@ -202,7 +202,7 @@ define amdgpu_gs void @ule(float %a) {

; GCN-LABEL: {{^}}une:
; GCN: v_cmp_eq_f32_e32 vcc, 0
define amdgpu_gs void @une(float %a) {
define amdgpu_ps void @une(float %a) {
%c1 = fcmp une float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
Expand All @@ -211,7 +211,7 @@ define amdgpu_gs void @une(float %a) {

; GCN-LABEL: {{^}}neg_olt:
; GCN: v_cmp_gt_f32_e32 vcc, 1.0
define amdgpu_gs void @neg_olt(float %a) {
define amdgpu_ps void @neg_olt(float %a) {
%c1 = fcmp olt float %a, 1.0
%c2 = xor i1 %c1, 1
call void @llvm.amdgcn.kill(i1 %c2)
Expand Down
12 changes: 9 additions & 3 deletions llvm/test/CodeGen/AMDGPU/wave32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1760,7 +1760,7 @@ define amdgpu_ps void @test_kill_i1_terminator_float() #0 {
ret void
}

define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d) #0 {
define amdgpu_ps void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d) #0 {
; GFX1032-LABEL: test_kill_i1_terminator_i1:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v1
Expand All @@ -1769,12 +1769,15 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d
; GFX1032-NEXT: s_or_b32 s0, vcc_lo, s0
; GFX1032-NEXT: s_andn2_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_andn2_b32 s1, s1, s0
; GFX1032-NEXT: s_cbranch_scc0 .LBB32_2
; GFX1032-NEXT: ; %bb.1:
; GFX1032-NEXT: s_and_b32 exec_lo, exec_lo, s1
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-NEXT: exp mrt0 off, off, off, off
; GFX1032-NEXT: s_endpgm
; GFX1032-NEXT: ; %bb.1:
; GFX1032-NEXT: .LBB32_2:
; GFX1032-NEXT: s_mov_b32 exec_lo, 0
; GFX1032-NEXT: exp null off, off, off, off done vm
; GFX1032-NEXT: s_endpgm
;
; GFX1064-LABEL: test_kill_i1_terminator_i1:
Expand All @@ -1785,12 +1788,15 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d
; GFX1064-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX1064-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
; GFX1064-NEXT: s_cbranch_scc0 .LBB32_2
; GFX1064-NEXT: ; %bb.1:
; GFX1064-NEXT: s_and_b64 exec, exec, s[2:3]
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
; GFX1064-NEXT: exp mrt0 off, off, off, off
; GFX1064-NEXT: s_endpgm
; GFX1064-NEXT: ; %bb.1:
; GFX1064-NEXT: .LBB32_2:
; GFX1064-NEXT: s_mov_b64 exec, 0
; GFX1064-NEXT: exp null off, off, off, off done vm
; GFX1064-NEXT: s_endpgm
%c1 = icmp slt i32 %a, %b
%c2 = icmp slt i32 %c, %d
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2727,7 +2727,7 @@ main_body:

declare void @llvm.amdgcn.kill(i1)

define void @kill_true() {
define amdgpu_ps void @kill_true() {
; CHECK-LABEL: @kill_true(
; CHECK-NEXT: ret void
;
Expand Down
34 changes: 34 additions & 0 deletions llvm/test/Verifier/amdgpu-intrinsic.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
; RUN: not llvm-as < %s 2>&1 | FileCheck %s

target datalayout = "A5"

declare void @llvm.amdgcn.kill(i1)
declare void @llvm.amdgcn.wqm.demote(i1)

; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
; CHECK-NEXT: call void @llvm.amdgcn.kill(i1 true)
define amdgpu_cs void @cs_kill() {
call void @llvm.amdgcn.kill(i1 true)
ret void
}

; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
; CHECK-NEXT: call void @llvm.amdgcn.kill(i1 true)
define amdgpu_gs void @gs_kill() {
call void @llvm.amdgcn.kill(i1 true)
ret void
}

; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
; CHECK-NEXT: call void @llvm.amdgcn.wqm.demote(i1 true)
define amdgpu_cs void @cs_wqm_demote() {
call void @llvm.amdgcn.wqm.demote(i1 true)
ret void
}

; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
; CHECK-NEXT: call void @llvm.amdgcn.wqm.demote(i1 true)
define amdgpu_gs void @gs_wqm_demote() {
call void @llvm.amdgcn.wqm.demote(i1 true)
ret void
}
Loading