-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[AMDGPU] Add V_ASHR_PK_I8_I32 and V_ASHR_PK_U8_I32 on gfx1250 #151389
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Add V_ASHR_PK_I8_I32 and V_ASHR_PK_U8_I32 on gfx1250 #151389
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) ChangesPatch is 23.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151389.diff 11 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 51f2c35a751ef..e571b6deee2b1 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -2013,6 +2013,8 @@ let AssemblerPredicate = isGFX11Plus in {
// These instructions differ from GFX12 variant by supporting DPP:
defm V_LSHL_ADD_U64 : VOP3Only_Realtriple_gfx1250<0x252>;
+defm V_ASHR_PK_I8_I32 : VOP3Only_Realtriple_gfx1250<0x290>;
+defm V_ASHR_PK_U8_I32 : VOP3Only_Realtriple_gfx1250<0x291>;
defm V_CVT_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36d>;
defm V_CVT_SR_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36e>;
diff --git a/llvm/test/CodeGen/AMDGPU/v_ashr_pk.ll b/llvm/test/CodeGen/AMDGPU/v_ashr_pk.ll
index aea2a8b3f0b78..f2ecfe8fc9a1f 100644
--- a/llvm/test/CodeGen/AMDGPU/v_ashr_pk.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_ashr_pk.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250 %s
define amdgpu_kernel void @v_ashr_pk_i8_i32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
; GFX950-LABEL: v_ashr_pk_i8_i32:
; GFX950: ; %bb.0:
@@ -13,6 +14,20 @@ define amdgpu_kernel void @v_ashr_pk_i8_i32(ptr addrspace(1) %out, i32 %src0, i3
; GFX950-NEXT: v_ashr_pk_i8_i32 v1, s0, v1, v2
; GFX950-NEXT: global_store_short v0, v1, s[6:7]
; GFX950-NEXT: s_endpgm
+;
+; GFX1250-LABEL: v_ashr_pk_i8_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x2c
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_and_b32 s2, s2, 31
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_ashr_pk_i8_i32 v0, s0, s1, v0
+; GFX1250-NEXT: global_store_b16 v1, v0, s[4:5]
+; GFX1250-NEXT: s_endpgm
%insert.0 = insertelement <2 x i32> poison, i32 %src0, i64 0
%build_vector = insertelement <2 x i32> %insert.0, i32 %src1, i64 1
%src2.clamp = and i32 %src2, 31
@@ -40,6 +55,20 @@ define amdgpu_kernel void @v_ashr_pk_u8_i32(ptr addrspace(1) %out, i32 %src0, i3
; GFX950-NEXT: v_ashr_pk_u8_i32 v1, s0, v1, v2
; GFX950-NEXT: global_store_short v0, v1, s[6:7]
; GFX950-NEXT: s_endpgm
+;
+; GFX1250-LABEL: v_ashr_pk_u8_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x2c
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_and_b32 s2, s2, 31
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_ashr_pk_u8_i32 v0, s0, s1, v0
+; GFX1250-NEXT: global_store_b16 v1, v0, s[4:5]
+; GFX1250-NEXT: s_endpgm
%insert.0 = insertelement <2 x i32> poison, i32 %src0, i64 0
%build_vector = insertelement <2 x i32> %insert.0, i32 %src1, i64 1
%src2.clamp = and i32 %src2, 31
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
index 67a916d80529e..f3bf07e01250a 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
@@ -366,3 +366,39 @@ v_cvt_sr_pk_bf16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4
v_cvt_sr_pk_bf16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2
// GFX1250: v_cvt_sr_pk_bf16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x6e,0xd7,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf]
+
+v_ashr_pk_i8_i32 v2, s4, v7, v8
+// GFX1250: v_ashr_pk_i8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0e,0x22,0x04]
+
+v_ashr_pk_i8_i32 v2, v4, 0, 1
+// GFX1250: v_ashr_pk_i8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x01,0x05,0x02]
+
+v_ashr_pk_i8_i32 v2, v4, 3, s2
+// GFX1250: v_ashr_pk_i8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x07,0x09,0x00]
+
+v_ashr_pk_i8_i32 v2, s4, 4, v2
+// GFX1250: v_ashr_pk_i8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x08,0x09,0x04]
+
+v_ashr_pk_i8_i32 v2, v4, v7, 12345
+// GFX1250: v_ashr_pk_i8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
+// GFX1250: v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x90,0xd6,0x02,0x07,0x12,0x04]
+
+v_ashr_pk_u8_i32 v2, s4, v7, v8
+// GFX1250: v_ashr_pk_u8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0e,0x22,0x04]
+
+v_ashr_pk_u8_i32 v2, v4, 0, 1
+// GFX1250: v_ashr_pk_u8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x01,0x05,0x02]
+
+v_ashr_pk_u8_i32 v2, v4, 3, s2
+// GFX1250: v_ashr_pk_u8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x07,0x09,0x00]
+
+v_ashr_pk_u8_i32 v2, s4, 4, v2
+// GFX1250: v_ashr_pk_u8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x08,0x09,0x04]
+
+v_ashr_pk_u8_i32 v2, v4, v7, 12345
+// GFX1250: v_ashr_pk_u8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
+// GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
index daab6c647ecc2..5b8cbec47957c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
@@ -366,3 +366,39 @@ v_cvt_sr_pk_bf16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4
v_cvt_sr_pk_bf16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2
// GFX1250: v_cvt_sr_pk_bf16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x6e,0xd7,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf]
+
+v_ashr_pk_i8_i32 v2, s4, v7, v8
+// GFX1250: v_ashr_pk_i8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0e,0x22,0x04]
+
+v_ashr_pk_i8_i32 v2, v4, 0, 1
+// GFX1250: v_ashr_pk_i8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x01,0x05,0x02]
+
+v_ashr_pk_i8_i32 v2, v4, 3, s2
+// GFX1250: v_ashr_pk_i8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x07,0x09,0x00]
+
+v_ashr_pk_i8_i32 v2, s4, 4, v2
+// GFX1250: v_ashr_pk_i8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x08,0x09,0x04]
+
+v_ashr_pk_i8_i32 v2, v4, v7, 12345
+// GFX1250: v_ashr_pk_i8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
+// GFX1250: v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x90,0xd6,0x02,0x07,0x12,0x04]
+
+v_ashr_pk_u8_i32 v2, s4, v7, v8
+// GFX1250: v_ashr_pk_u8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0e,0x22,0x04]
+
+v_ashr_pk_u8_i32 v2, v4, 0, 1
+// GFX1250: v_ashr_pk_u8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x01,0x05,0x02]
+
+v_ashr_pk_u8_i32 v2, v4, 3, s2
+// GFX1250: v_ashr_pk_u8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x07,0x09,0x00]
+
+v_ashr_pk_u8_i32 v2, s4, 4, v2
+// GFX1250: v_ashr_pk_u8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x08,0x09,0x04]
+
+v_ashr_pk_u8_i32 v2, v4, v7, 12345
+// GFX1250: v_ashr_pk_u8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
+// GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s
index 4868c49d6aa23..4f5be384380e3 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s
@@ -297,3 +297,35 @@ v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 row_xmask:0 row_mask:0x1 ban
v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX1250: v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x6e,0xd7,0xfa,0xfe,0xf7,0x7b,0xff,0x6f,0x05,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s
index 225f4b0e80dff..dc45c6a80e9f1 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s
@@ -297,3 +297,35 @@ v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 row_xmask:0 row_mask:0x1 ban
v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX1250: v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x6e,0xd7,0xfa,0xfe,0xf7,0x7b,0xff,0x6f,0x05,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s
index 277f2d6fd3a8f..8c5b84231076e 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s
@@ -209,3 +209,19 @@ v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX1250: v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x6e,0xd7,0xe9,0xfe,0xf7,0x7b,0xff,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x90,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x40,0x90,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x40,0x91,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s
index 7e29d04f06dad..73c79721f3b0e 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s
@@ -86,3 +86,13 @@ v_mad_nc_i64_i32 v[4:5], v2, v5, v[6:7] quad_perm:[3,2,1,0]
// GFX1251-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share
// GFX125X-ERR-NEXT:{{^}}v_mad_nc_i64_i32 v[4:5], v2, v5, v[6:7] quad_perm:[3,2,1,0]
// GFX125X-ERR-NEXT:{{^}} ^
+
+v_ashr_pk_i8_i32 v1, v2, v3, v4 clamp
+// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX125X-ERR-NEXT:{{^}}v_ashr_pk_i8_i32 v1, v2, v3, v4 clamp
+// GFX125X-ERR-NEXT:{{^}} ^
+
+v_ashr_pk_u8_i32 v1, v2, v3, v4 clamp
+// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX125X-ERR-NEXT:{{^}}v_ashr_pk_u8_i32 v1, v2, v3, v4 clamp
+// GFX125X-ERR-NEXT:{{^}} ^
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
index 345fd5a6a33f2..c62f888524e81 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
@@ -386,6 +386,42 @@
0x05,0x01,0x6e,0xd7,0x7e,0x82,0xad,0x01
# GFX1250: v_cvt_sr_pk_bf16_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x6e,0xd7,0x7e,0x82,0xad,0x01]
+0x02,0x00,0x90,0xd6,0x04,0x08,0x09,0x04
+# GFX1250: v_ashr_pk_i8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x08,0x09,0x04]
+
+0x02,0x00,0x90,0xd6,0x04,0x0e,0x22,0x04
+# GFX1250: v_ashr_pk_i8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0e,0x22,0x04]
+
+0x02,0x00,0x90,0xd6,0x04,0x01,0x05,0x02
+# GFX1250: v_ashr_pk_i8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x01,0x05,0x02]
+
+0x02,0x00,0x90,0xd6,0x04,0x07,0x09,0x00
+# GFX1250: v_ashr_pk_i8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x07,0x09,0x00]
+
+0x02,0x00,0x90,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
+# GFX1250: v_ashr_pk_i8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+0x01,0x40,0x90,0xd6,0x02,0x07,0x12,0x04
+# GFX1250: v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x90,0xd6,0x02,0x07,0x12,0x04]
+
+0x02,0x00,0x91,0xd6,0x04,0x08,0x09,0x04
+# GFX1250: v_ashr_pk_u8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x08,0x09,0x04]
+
+0x02,0x00,0x91,0xd6,0x04,0x0e,0x22,0x04
+# GFX1250: v_ashr_pk_u8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0e,0x22,0x04]
+
+0x02,0x00,0x91,0xd6,0x04,0x01,0x05,0x02
+# GFX1250: v_ashr_pk_u8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x01,0x05,0x02]
+
+0x02,0x00,0x91,0xd6,0x04,0x07,0x09,0x00
+# GFX1250: v_ashr_pk_u8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x07,0x09,0x00]
+
+0x02,0x00,0x91,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
+# GFX1250: v_ashr_pk_u8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04
+# GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04]
+
## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
# GFX1250-FAKE16: {{.*}}
# GFX1250-REAL16: {{.*}}
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt
index d9c8934e6b1cb..e57fa923f2ffe 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AM...
[truncated]
|
@llvm/pr-subscribers-mc Author: Stanislav Mekhanoshin (rampitec) ChangesPatch is 23.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151389.diff 11 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 51f2c35a751ef..e571b6deee2b1 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -2013,6 +2013,8 @@ let AssemblerPredicate = isGFX11Plus in {
// These instructions differ from GFX12 variant by supporting DPP:
defm V_LSHL_ADD_U64 : VOP3Only_Realtriple_gfx1250<0x252>;
+defm V_ASHR_PK_I8_I32 : VOP3Only_Realtriple_gfx1250<0x290>;
+defm V_ASHR_PK_U8_I32 : VOP3Only_Realtriple_gfx1250<0x291>;
defm V_CVT_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36d>;
defm V_CVT_SR_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36e>;
diff --git a/llvm/test/CodeGen/AMDGPU/v_ashr_pk.ll b/llvm/test/CodeGen/AMDGPU/v_ashr_pk.ll
index aea2a8b3f0b78..f2ecfe8fc9a1f 100644
--- a/llvm/test/CodeGen/AMDGPU/v_ashr_pk.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_ashr_pk.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250 %s
define amdgpu_kernel void @v_ashr_pk_i8_i32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
; GFX950-LABEL: v_ashr_pk_i8_i32:
; GFX950: ; %bb.0:
@@ -13,6 +14,20 @@ define amdgpu_kernel void @v_ashr_pk_i8_i32(ptr addrspace(1) %out, i32 %src0, i3
; GFX950-NEXT: v_ashr_pk_i8_i32 v1, s0, v1, v2
; GFX950-NEXT: global_store_short v0, v1, s[6:7]
; GFX950-NEXT: s_endpgm
+;
+; GFX1250-LABEL: v_ashr_pk_i8_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x2c
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_and_b32 s2, s2, 31
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_ashr_pk_i8_i32 v0, s0, s1, v0
+; GFX1250-NEXT: global_store_b16 v1, v0, s[4:5]
+; GFX1250-NEXT: s_endpgm
%insert.0 = insertelement <2 x i32> poison, i32 %src0, i64 0
%build_vector = insertelement <2 x i32> %insert.0, i32 %src1, i64 1
%src2.clamp = and i32 %src2, 31
@@ -40,6 +55,20 @@ define amdgpu_kernel void @v_ashr_pk_u8_i32(ptr addrspace(1) %out, i32 %src0, i3
; GFX950-NEXT: v_ashr_pk_u8_i32 v1, s0, v1, v2
; GFX950-NEXT: global_store_short v0, v1, s[6:7]
; GFX950-NEXT: s_endpgm
+;
+; GFX1250-LABEL: v_ashr_pk_u8_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x2c
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_and_b32 s2, s2, 31
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_ashr_pk_u8_i32 v0, s0, s1, v0
+; GFX1250-NEXT: global_store_b16 v1, v0, s[4:5]
+; GFX1250-NEXT: s_endpgm
%insert.0 = insertelement <2 x i32> poison, i32 %src0, i64 0
%build_vector = insertelement <2 x i32> %insert.0, i32 %src1, i64 1
%src2.clamp = and i32 %src2, 31
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
index 67a916d80529e..f3bf07e01250a 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
@@ -366,3 +366,39 @@ v_cvt_sr_pk_bf16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4
v_cvt_sr_pk_bf16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2
// GFX1250: v_cvt_sr_pk_bf16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x6e,0xd7,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf]
+
+v_ashr_pk_i8_i32 v2, s4, v7, v8
+// GFX1250: v_ashr_pk_i8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0e,0x22,0x04]
+
+v_ashr_pk_i8_i32 v2, v4, 0, 1
+// GFX1250: v_ashr_pk_i8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x01,0x05,0x02]
+
+v_ashr_pk_i8_i32 v2, v4, 3, s2
+// GFX1250: v_ashr_pk_i8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x07,0x09,0x00]
+
+v_ashr_pk_i8_i32 v2, s4, 4, v2
+// GFX1250: v_ashr_pk_i8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x08,0x09,0x04]
+
+v_ashr_pk_i8_i32 v2, v4, v7, 12345
+// GFX1250: v_ashr_pk_i8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
+// GFX1250: v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x90,0xd6,0x02,0x07,0x12,0x04]
+
+v_ashr_pk_u8_i32 v2, s4, v7, v8
+// GFX1250: v_ashr_pk_u8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0e,0x22,0x04]
+
+v_ashr_pk_u8_i32 v2, v4, 0, 1
+// GFX1250: v_ashr_pk_u8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x01,0x05,0x02]
+
+v_ashr_pk_u8_i32 v2, v4, 3, s2
+// GFX1250: v_ashr_pk_u8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x07,0x09,0x00]
+
+v_ashr_pk_u8_i32 v2, s4, 4, v2
+// GFX1250: v_ashr_pk_u8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x08,0x09,0x04]
+
+v_ashr_pk_u8_i32 v2, v4, v7, 12345
+// GFX1250: v_ashr_pk_u8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
+// GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
index daab6c647ecc2..5b8cbec47957c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
@@ -366,3 +366,39 @@ v_cvt_sr_pk_bf16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4
v_cvt_sr_pk_bf16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2
// GFX1250: v_cvt_sr_pk_bf16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x6e,0xd7,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf]
+
+v_ashr_pk_i8_i32 v2, s4, v7, v8
+// GFX1250: v_ashr_pk_i8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0e,0x22,0x04]
+
+v_ashr_pk_i8_i32 v2, v4, 0, 1
+// GFX1250: v_ashr_pk_i8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x01,0x05,0x02]
+
+v_ashr_pk_i8_i32 v2, v4, 3, s2
+// GFX1250: v_ashr_pk_i8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x07,0x09,0x00]
+
+v_ashr_pk_i8_i32 v2, s4, 4, v2
+// GFX1250: v_ashr_pk_i8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x08,0x09,0x04]
+
+v_ashr_pk_i8_i32 v2, v4, v7, 12345
+// GFX1250: v_ashr_pk_i8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
+// GFX1250: v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x90,0xd6,0x02,0x07,0x12,0x04]
+
+v_ashr_pk_u8_i32 v2, s4, v7, v8
+// GFX1250: v_ashr_pk_u8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0e,0x22,0x04]
+
+v_ashr_pk_u8_i32 v2, v4, 0, 1
+// GFX1250: v_ashr_pk_u8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x01,0x05,0x02]
+
+v_ashr_pk_u8_i32 v2, v4, 3, s2
+// GFX1250: v_ashr_pk_u8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x07,0x09,0x00]
+
+v_ashr_pk_u8_i32 v2, s4, 4, v2
+// GFX1250: v_ashr_pk_u8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x08,0x09,0x04]
+
+v_ashr_pk_u8_i32 v2, v4, v7, 12345
+// GFX1250: v_ashr_pk_u8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1]
+// GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s
index 4868c49d6aa23..4f5be384380e3 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s
@@ -297,3 +297,35 @@ v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 row_xmask:0 row_mask:0x1 ban
v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX1250: v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x6e,0xd7,0xfa,0xfe,0xf7,0x7b,0xff,0x6f,0x05,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s
index 225f4b0e80dff..dc45c6a80e9f1 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s
@@ -297,3 +297,35 @@ v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 row_xmask:0 row_mask:0x1 ban
v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX1250: v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x6e,0xd7,0xfa,0xfe,0xf7,0x7b,0xff,0x6f,0x05,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s
index 277f2d6fd3a8f..8c5b84231076e 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s
@@ -209,3 +209,19 @@ v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX1250: v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x6e,0xd7,0xe9,0xfe,0xf7,0x7b,0xff,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x90,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_i8_i32 v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_ashr_pk_i8_i32_e64_dpp v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x40,0x90,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_ashr_pk_u8_i32 v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x40,0x91,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s
index 7e29d04f06dad..73c79721f3b0e 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s
@@ -86,3 +86,13 @@ v_mad_nc_i64_i32 v[4:5], v2, v5, v[6:7] quad_perm:[3,2,1,0]
// GFX1251-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share
// GFX125X-ERR-NEXT:{{^}}v_mad_nc_i64_i32 v[4:5], v2, v5, v[6:7] quad_perm:[3,2,1,0]
// GFX125X-ERR-NEXT:{{^}} ^
+
+v_ashr_pk_i8_i32 v1, v2, v3, v4 clamp
+// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX125X-ERR-NEXT:{{^}}v_ashr_pk_i8_i32 v1, v2, v3, v4 clamp
+// GFX125X-ERR-NEXT:{{^}} ^
+
+v_ashr_pk_u8_i32 v1, v2, v3, v4 clamp
+// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX125X-ERR-NEXT:{{^}}v_ashr_pk_u8_i32 v1, v2, v3, v4 clamp
+// GFX125X-ERR-NEXT:{{^}} ^
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
index 345fd5a6a33f2..c62f888524e81 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
@@ -386,6 +386,42 @@
0x05,0x01,0x6e,0xd7,0x7e,0x82,0xad,0x01
# GFX1250: v_cvt_sr_pk_bf16_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x6e,0xd7,0x7e,0x82,0xad,0x01]
+0x02,0x00,0x90,0xd6,0x04,0x08,0x09,0x04
+# GFX1250: v_ashr_pk_i8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x08,0x09,0x04]
+
+0x02,0x00,0x90,0xd6,0x04,0x0e,0x22,0x04
+# GFX1250: v_ashr_pk_i8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0e,0x22,0x04]
+
+0x02,0x00,0x90,0xd6,0x04,0x01,0x05,0x02
+# GFX1250: v_ashr_pk_i8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x01,0x05,0x02]
+
+0x02,0x00,0x90,0xd6,0x04,0x07,0x09,0x00
+# GFX1250: v_ashr_pk_i8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x07,0x09,0x00]
+
+0x02,0x00,0x90,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
+# GFX1250: v_ashr_pk_i8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x90,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+0x01,0x40,0x90,0xd6,0x02,0x07,0x12,0x04
+# GFX1250: v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x90,0xd6,0x02,0x07,0x12,0x04]
+
+0x02,0x00,0x91,0xd6,0x04,0x08,0x09,0x04
+# GFX1250: v_ashr_pk_u8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x08,0x09,0x04]
+
+0x02,0x00,0x91,0xd6,0x04,0x0e,0x22,0x04
+# GFX1250: v_ashr_pk_u8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0e,0x22,0x04]
+
+0x02,0x00,0x91,0xd6,0x04,0x01,0x05,0x02
+# GFX1250: v_ashr_pk_u8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x01,0x05,0x02]
+
+0x02,0x00,0x91,0xd6,0x04,0x07,0x09,0x00
+# GFX1250: v_ashr_pk_u8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x07,0x09,0x00]
+
+0x02,0x00,0x91,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
+# GFX1250: v_ashr_pk_u8_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x91,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04
+# GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04]
+
## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
# GFX1250-FAKE16: {{.*}}
# GFX1250-REAL16: {{.*}}
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt
index d9c8934e6b1cb..e57fa923f2ffe 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AM...
[truncated]
|
18a03e0
to
f9d1665
Compare
b4efd90
to
b0f03f1
Compare
…8_i32_and_v_ashr_pk_u8_i32_on_gfx1250
…8_i32_and_v_ashr_pk_u8_i32_on_gfx1250
No description provided.