Skip to content

Commit b137020

Browse files
committed
[AMDGPU] Add gfx1250 V_ADD_{MIN|MAX}_{U|I}32 instructions
1 parent 3b5aff5 commit b137020

13 files changed

+454
-33
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2576,6 +2576,10 @@ def HasFmaakFmamkF64Insts :
25762576
Predicate<"Subtarget->hasFmaakFmamkF64Insts()">,
25772577
AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
25782578

2579+
def HasAddMinMaxInsts :
2580+
Predicate<"Subtarget->hasAddMinMaxInsts()">,
2581+
AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
2582+
25792583
def HasPkAddMinMaxInsts :
25802584
Predicate<"Subtarget->hasPkAddMinMaxInsts()">,
25812585
AssemblerPredicate<(any_of FeatureGFX1250Insts)>;

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1535,6 +1535,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
15351535
// \returns true if the target has V_{MIN|MAX}_{I|U}64 instructions.
15361536
bool hasIntMinMax64() const { return GFX1250Insts; }
15371537

1538+
// \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions.
1539+
bool hasAddMinMaxInsts() const { return GFX1250Insts; }
1540+
15381541
// \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions.
15391542
bool hasPkAddMinMaxInsts() const { return GFX1250Insts; }
15401543

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,13 @@ let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
746746
defm V_MAXIMUM3_F16 : VOP3Inst_t16 <"v_maximum3_f16", VOP_F16_F16_F16_F16, AMDGPUfmaximum3>;
747747
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
748748

749+
let SubtargetPredicate = HasAddMinMaxInsts, isCommutable = 1, isReMaterializable = 1 in {
750+
defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP_I32_I32_I32_I32>;
751+
defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP_I32_I32_I32_I32>;
752+
defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP_I32_I32_I32_I32>;
753+
defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP_I32_I32_I32_I32>;
754+
}
755+
749756
defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
750757
defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;
751758

@@ -885,6 +892,13 @@ def : GCNPat<
885892
(V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)
886893
>;
887894

895+
let SubtargetPredicate = HasAddMinMaxInsts in {
896+
def : ThreeOp_i32_Pats<add, smax, V_ADD_MAX_I32_e64>;
897+
def : ThreeOp_i32_Pats<add, umax, V_ADD_MAX_U32_e64>;
898+
def : ThreeOp_i32_Pats<add, smin, V_ADD_MIN_I32_e64>;
899+
def : ThreeOp_i32_Pats<add, umin, V_ADD_MIN_U32_e64>;
900+
}
901+
888902
def : VOPBinOpClampPat<saddsat, V_ADD_I32_e64, i32>;
889903
def : VOPBinOpClampPat<ssubsat, V_SUB_I32_e64, i32>;
890904

@@ -1821,6 +1835,10 @@ defm V_MIN_U64 : VOP3Only_Realtriple_gfx1250<0x318>;
18211835
defm V_MAX_U64 : VOP3Only_Realtriple_gfx1250<0x319>;
18221836
defm V_MIN_I64 : VOP3Only_Realtriple_gfx1250<0x31a>;
18231837
defm V_MAX_I64 : VOP3Only_Realtriple_gfx1250<0x31b>;
1838+
defm V_ADD_MAX_I32 : VOP3Only_Realtriple_gfx1250<0x25e>;
1839+
defm V_ADD_MAX_U32 : VOP3Only_Realtriple_gfx1250<0x25f>;
1840+
defm V_ADD_MIN_I32 : VOP3Only_Realtriple_gfx1250<0x260>;
1841+
defm V_ADD_MIN_U32 : VOP3Only_Realtriple_gfx1250<0x261>;
18241842

18251843
defm V_CVT_PK_FP8_F32 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x369, "v_cvt_pk_fp8_f32">;
18261844
defm V_CVT_PK_BF8_F32 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x36a, "v_cvt_pk_bf8_f32">;

llvm/test/CodeGen/AMDGPU/add-max.ll

Lines changed: 29 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@
55
define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
66
; GCN-LABEL: add_max_u32_vvv:
77
; GCN: ; %bb.0:
8-
; GCN-NEXT: v_add_nc_u32_e32 v0, v0, v1
9-
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
10-
; GCN-NEXT: v_max_u32_e32 v0, v0, v2
8+
; GCN-NEXT: v_add_max_u32_e64 v0, v0, v1, v2
119
; GCN-NEXT: ; return to shader part epilog
1210
%add = add i32 %a, %b
1311
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
@@ -18,9 +16,7 @@ define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
1816
define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
1917
; GCN-LABEL: add_max_u32_svv:
2018
; GCN: ; %bb.0:
21-
; GCN-NEXT: v_add_nc_u32_e32 v0, s0, v0
22-
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
23-
; GCN-NEXT: v_max_u32_e32 v0, v0, v1
19+
; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, v1
2420
; GCN-NEXT: ; return to shader part epilog
2521
%add = add i32 %a, %b
2622
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
@@ -29,12 +25,17 @@ define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
2925
}
3026

3127
define amdgpu_ps float @add_max_u32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) {
32-
; GCN-LABEL: add_max_u32_ssv:
33-
; GCN: ; %bb.0:
34-
; GCN-NEXT: s_add_co_i32 s0, s0, s1
35-
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
36-
; GCN-NEXT: v_max_u32_e32 v0, s0, v0
37-
; GCN-NEXT: ; return to shader part epilog
28+
; SDAG-LABEL: add_max_u32_ssv:
29+
; SDAG: ; %bb.0:
30+
; SDAG-NEXT: v_add_max_u32_e64 v0, s0, s1, v0
31+
; SDAG-NEXT: ; return to shader part epilog
32+
;
33+
; GISEL-LABEL: add_max_u32_ssv:
34+
; GISEL: ; %bb.0:
35+
; GISEL-NEXT: s_add_co_i32 s0, s0, s1
36+
; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
37+
; GISEL-NEXT: v_max_u32_e32 v0, s0, v0
38+
; GISEL-NEXT: ; return to shader part epilog
3839
%add = add i32 %a, %b
3940
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
4041
%ret = bitcast i32 %max to float
@@ -58,9 +59,7 @@ define amdgpu_ps float @add_max_u32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c
5859
define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
5960
; GCN-LABEL: add_max_u32_vsi:
6061
; GCN: ; %bb.0:
61-
; GCN-NEXT: v_add_nc_u32_e32 v0, s0, v0
62-
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
63-
; GCN-NEXT: v_max_u32_e32 v0, 4, v0
62+
; GCN-NEXT: v_add_max_u32_e64 v0, v0, s0, 4
6463
; GCN-NEXT: ; return to shader part epilog
6564
%add = add i32 %a, %b
6665
%max = call i32 @llvm.umax.i32(i32 %add, i32 4)
@@ -71,9 +70,7 @@ define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
7170
define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
7271
; GCN-LABEL: add_max_u32_svl:
7372
; GCN: ; %bb.0:
74-
; GCN-NEXT: v_add_nc_u32_e32 v0, s0, v0
75-
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
76-
; GCN-NEXT: v_max_u32_e32 v0, 0x64, v0
73+
; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, 0x64
7774
; GCN-NEXT: ; return to shader part epilog
7875
%add = add i32 %a, %b
7976
%max = call i32 @llvm.umax.i32(i32 %add, i32 100)
@@ -82,12 +79,17 @@ define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
8279
}
8380

8481
define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
85-
; GCN-LABEL: add_max_u32_slv:
86-
; GCN: ; %bb.0:
87-
; GCN-NEXT: s_addk_co_i32 s0, 0x64
88-
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
89-
; GCN-NEXT: v_max_u32_e32 v0, s0, v0
90-
; GCN-NEXT: ; return to shader part epilog
82+
; SDAG-LABEL: add_max_u32_slv:
83+
; SDAG: ; %bb.0:
84+
; SDAG-NEXT: v_add_max_u32_e64 v0, 0x64, s0, v0
85+
; SDAG-NEXT: ; return to shader part epilog
86+
;
87+
; GISEL-LABEL: add_max_u32_slv:
88+
; GISEL: ; %bb.0:
89+
; GISEL-NEXT: s_addk_co_i32 s0, 0x64
90+
; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
91+
; GISEL-NEXT: v_max_u32_e32 v0, s0, v0
92+
; GISEL-NEXT: ; return to shader part epilog
9193
%add = add i32 %a, 100
9294
%max = call i32 @llvm.umax.i32(i32 %add, i32 %b)
9395
%ret = bitcast i32 %max to float
@@ -97,9 +99,7 @@ define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
9799
define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
98100
; GCN-LABEL: add_max_i32_vvv:
99101
; GCN: ; %bb.0:
100-
; GCN-NEXT: v_add_nc_u32_e32 v0, v0, v1
101-
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
102-
; GCN-NEXT: v_max_i32_e32 v0, v0, v2
102+
; GCN-NEXT: v_add_max_i32_e64 v0, v0, v1, v2
103103
; GCN-NEXT: ; return to shader part epilog
104104
%add = add i32 %a, %b
105105
%max = call i32 @llvm.smax.i32(i32 %add, i32 %c)
@@ -110,9 +110,7 @@ define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
110110
define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
111111
; GCN-LABEL: add_min_u32_vvv:
112112
; GCN: ; %bb.0:
113-
; GCN-NEXT: v_add_nc_u32_e32 v0, v0, v1
114-
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
115-
; GCN-NEXT: v_min_u32_e32 v0, v0, v2
113+
; GCN-NEXT: v_add_min_u32_e64 v0, v0, v1, v2
116114
; GCN-NEXT: ; return to shader part epilog
117115
%add = add i32 %a, %b
118116
%max = call i32 @llvm.umin.i32(i32 %add, i32 %c)
@@ -123,9 +121,7 @@ define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
123121
define amdgpu_ps float @add_min_i32_vvv(i32 %a, i32 %b, i32 %c) {
124122
; GCN-LABEL: add_min_i32_vvv:
125123
; GCN: ; %bb.0:
126-
; GCN-NEXT: v_add_nc_u32_e32 v0, v0, v1
127-
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
128-
; GCN-NEXT: v_min_i32_e32 v0, v0, v2
124+
; GCN-NEXT: v_add_min_i32_e64 v0, v0, v1, v2
129125
; GCN-NEXT: ; return to shader part epilog
130126
%add = add i32 %a, %b
131127
%max = call i32 @llvm.smin.i32(i32 %add, i32 %c)

llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,66 @@ v_mad_nc_i64_i32 v[2:3], v4, v7, 12345
217217
v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp
218218
// GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
219219

220+
v_add_min_i32 v2, s4, v7, v8
221+
// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
222+
223+
v_add_min_i32 v2, v4, 0, 1
224+
// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
225+
226+
v_add_min_i32 v2, v4, 3, s2
227+
// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
228+
229+
v_add_min_i32 v2, s4, 4, v2
230+
// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
231+
232+
v_add_min_i32 v2, v4, v7, 12345
233+
// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
234+
235+
v_add_max_i32 v2, s4, v7, v8
236+
// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
237+
238+
v_add_max_i32 v2, v4, 0, 1
239+
// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
240+
241+
v_add_max_i32 v2, v4, 3, s2
242+
// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
243+
244+
v_add_max_i32 v2, s4, 4, v2
245+
// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
246+
247+
v_add_max_i32 v2, v4, v7, 12345
248+
// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
249+
250+
v_add_min_u32 v2, s4, v7, v8
251+
// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
252+
253+
v_add_min_u32 v2, v4, 0, 1
254+
// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
255+
256+
v_add_min_u32 v2, v4, 3, s2
257+
// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
258+
259+
v_add_min_u32 v2, s4, 4, v2
260+
// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
261+
262+
v_add_min_u32 v2, v4, v7, 12345
263+
// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
264+
265+
v_add_max_u32 v2, s4, v7, v8
266+
// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
267+
268+
v_add_max_u32 v2, v4, 0, 1
269+
// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
270+
271+
v_add_max_u32 v2, v4, 3, s2
272+
// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
273+
274+
v_add_max_u32 v2, s4, 4, v2
275+
// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
276+
277+
v_add_max_u32 v2, v4, v7, 12345
278+
// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
279+
220280
v_cvt_pk_bf16_f32 v5, v1, v2
221281
// GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00]
222282

llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,66 @@ v_mad_nc_i64_i32 v[2:3], v4, v7, 12345
217217
v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp
218218
// GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
219219

220+
v_add_min_i32 v2, s4, v7, v8
221+
// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
222+
223+
v_add_min_i32 v2, v4, 0, 1
224+
// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
225+
226+
v_add_min_i32 v2, v4, 3, s2
227+
// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
228+
229+
v_add_min_i32 v2, s4, 4, v2
230+
// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
231+
232+
v_add_min_i32 v2, v4, v7, 12345
233+
// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
234+
235+
v_add_max_i32 v2, s4, v7, v8
236+
// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
237+
238+
v_add_max_i32 v2, v4, 0, 1
239+
// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
240+
241+
v_add_max_i32 v2, v4, 3, s2
242+
// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
243+
244+
v_add_max_i32 v2, s4, 4, v2
245+
// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
246+
247+
v_add_max_i32 v2, v4, v7, 12345
248+
// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
249+
250+
v_add_min_u32 v2, s4, v7, v8
251+
// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
252+
253+
v_add_min_u32 v2, v4, 0, 1
254+
// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
255+
256+
v_add_min_u32 v2, v4, 3, s2
257+
// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
258+
259+
v_add_min_u32 v2, s4, 4, v2
260+
// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
261+
262+
v_add_min_u32 v2, v4, v7, 12345
263+
// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
264+
265+
v_add_max_u32 v2, s4, v7, v8
266+
// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
267+
268+
v_add_max_u32 v2, v4, 0, 1
269+
// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
270+
271+
v_add_max_u32 v2, v4, 3, s2
272+
// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
273+
274+
v_add_max_u32 v2, s4, 4, v2
275+
// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
276+
277+
v_add_max_u32 v2, v4, v7, 12345
278+
// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
279+
220280
v_cvt_pk_bf16_f32 v5, v1, v2
221281
// GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00]
222282

llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,54 @@ v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:102 op_sel:[1,1,1,1] quad_perm:[0,1,2
146146
// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0x66 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x33,0xd6,0xfa,0x04,0x0e,0xcc,0x01,0xe4,0x00,0xff]
147147
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
148148

149+
v_add_min_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
150+
// GFX1250: v_add_min_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x60,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
151+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
152+
153+
v_add_min_i32 v2, v4, v7, v8 row_share:3 fi:1
154+
// GFX1250: v_add_min_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x60,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
155+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
156+
157+
v_add_min_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
158+
// GFX1250: v_add_min_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x60,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
159+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
160+
161+
v_add_max_i32 v2, v4, v7, v8 quad_perm:[3,2,1,0]
162+
// GFX1250: v_add_max_i32_e64_dpp v2, v4, v7, v8 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x5e,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x1b,0x00,0xff]
163+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
164+
165+
v_add_max_i32 v2, v4, v7, v8 row_share:3 fi:1
166+
// GFX1250: v_add_max_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x5e,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
167+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
168+
169+
v_add_max_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
170+
// GFX1250: v_add_max_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x5e,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
171+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
172+
173+
v_add_min_u32 v2, v4, v7, v8 quad_perm:[3,2,1,0]
174+
// GFX1250: v_add_min_u32_e64_dpp v2, v4, v7, v8 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x61,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x1b,0x00,0xff]
175+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
176+
177+
v_add_min_u32 v2, v4, v7, v8 row_share:3 fi:1
178+
// GFX1250: v_add_min_u32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x61,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
179+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
180+
181+
v_add_min_u32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
182+
// GFX1250: v_add_min_u32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x61,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
183+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
184+
185+
v_add_max_u32 v2, v4, v7, v8 quad_perm:[3,2,1,0]
186+
// GFX1250: v_add_max_u32_e64_dpp v2, v4, v7, v8 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x5f,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x1b,0x00,0xff]
187+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
188+
189+
v_add_max_u32 v2, v4, v7, v8 row_share:3 fi:1
190+
// GFX1250: v_add_max_u32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x5f,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
191+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
192+
193+
v_add_max_u32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
194+
// GFX1250: v_add_max_u32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x5f,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
195+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
196+
149197
v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
150198
// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
151199
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

0 commit comments

Comments
 (0)