Skip to content

Commit 62187a6

Browse files
authored
[AMDGPU] Add gfx1250 v_cvt_sr_pk_bf16_f32 instruction (#151385)
1 parent f5d49c7 commit 62187a6

18 files changed

+482
-1
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,7 @@ TARGET_BUILTIN(__builtin_amdgcn_exp2_bf16, "yy", "nc", "bf16-trans-insts")
697697
TARGET_BUILTIN(__builtin_amdgcn_sin_bf16, "yy", "nc", "bf16-trans-insts")
698698
TARGET_BUILTIN(__builtin_amdgcn_cos_bf16, "yy", "nc", "bf16-trans-insts")
699699

700+
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_pk_bf16_f32, "V2yffi", "nc", "bf16-cvt-insts")
700701
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
701702
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
702703
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")

clang/test/CodeGenOpenCL/amdgpu-features.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@
108108
// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
109109
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
110110
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
111-
// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32
111+
// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32
112112

113113
// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
114114

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
typedef unsigned int uint;
88
typedef unsigned short int ushort;
99
typedef unsigned int __attribute__((ext_vector_type(2))) uint2;
10+
typedef __bf16 __attribute__((ext_vector_type(2))) bfloat2;
1011
typedef half __attribute__((ext_vector_type(2))) half2;
1112

1213
// CHECK-LABEL: @test_setprio_inc_wg(
@@ -254,6 +255,33 @@ void test_cos_bf16(global __bf16* out, __bf16 a)
254255
*out = __builtin_amdgcn_cos_bf16(a);
255256
}
256257

258+
// CHECK-LABEL: @test_cvt_sr_pk_bf16_f32(
259+
// CHECK-NEXT: entry:
260+
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
261+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
262+
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float, align 4, addrspace(5)
263+
// CHECK-NEXT: [[SR_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
264+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
265+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
266+
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
267+
// CHECK-NEXT: [[SR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SR_ADDR]] to ptr
268+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
269+
// CHECK-NEXT: store float [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
270+
// CHECK-NEXT: store float [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
271+
// CHECK-NEXT: store i32 [[SR:%.*]], ptr [[SR_ADDR_ASCAST]], align 4
272+
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4
273+
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR_ASCAST]], align 4
274+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
275+
// CHECK-NEXT: [[TMP3:%.*]] = call <2 x bfloat> @llvm.amdgcn.cvt.sr.pk.bf16.f32(float [[TMP0]], float [[TMP1]], i32 [[TMP2]])
276+
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
277+
// CHECK-NEXT: store <2 x bfloat> [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
278+
// CHECK-NEXT: ret void
279+
//
280+
void test_cvt_sr_pk_bf16_f32(global bfloat2* out, float a, float b, uint sr)
281+
{
282+
*out = __builtin_amdgcn_cvt_sr_pk_bf16_f32(a, b, sr);
283+
}
284+
257285
// CHECK-LABEL: @test_cvt_f16_fp8(
258286
// CHECK-NEXT: entry:
259287
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,10 @@ def int_amdgcn_tanh : DefaultAttrsIntrinsic<
593593
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
594594
>;
595595

596+
def int_amdgcn_cvt_sr_pk_bf16_f32 : DefaultAttrsIntrinsic<
597+
[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]
598+
>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_bf16_f32">;
599+
596600
def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic<
597601
[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
598602
>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">;

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4574,6 +4574,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
45744574
case Intrinsic::amdgcn_cvt_pknorm_u16:
45754575
case Intrinsic::amdgcn_cvt_pk_i16:
45764576
case Intrinsic::amdgcn_cvt_pk_u16:
4577+
case Intrinsic::amdgcn_cvt_sr_pk_bf16_f32:
45774578
case Intrinsic::amdgcn_cvt_pk_f16_fp8:
45784579
case Intrinsic::amdgcn_cvt_pk_f16_bf8:
45794580
case Intrinsic::amdgcn_sat_pk4_i4_i8:

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2926,6 +2926,7 @@ def VOP_V2BF16_F32_F32 : VOPProfile <[v2bf16, f32, f32, untyped]>;
29262926
def VOP_V32F32_V6I32_F32 : VOPProfile <[v32f32, v6i32, f32, untyped]>;
29272927
def VOP_V32F16_V6I32_F32 : VOPProfile <[v32f16, v6i32, f32, untyped]>;
29282928
def VOP_V32BF16_V6I32_F32 : VOPProfile <[v32bf16, v6i32, f32, untyped]>;
2929+
def VOP_V2BF16_F32_F32_I32 : VOPProfile <[v2bf16, f32, f32, i32]>;
29292930
def VOP_V6I32_V32F16_F32 : VOPProfile<[v6i32, v32f16, f32, untyped]>;
29302931
def VOP_V6I32_V32BF16_F32 : VOPProfile<[v6i32, v32bf16, f32, untyped]>;
29312932
def VOP_V6I32_V16F32_V16F32_F32 : VOPProfile<[v6i32, v16f32, v16f32, f32]>;

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1610,6 +1610,7 @@ def bf16_fpround : PatFrag <(ops node:$src0), (fpround $src0), [{ return true;
16101610
let SubtargetPredicate = HasBF16ConversionInsts in {
16111611
let ReadsModeReg = 0 in {
16121612
defm V_CVT_PK_BF16_F32 : VOP3Inst<"v_cvt_pk_bf16_f32", VOP3_Profile<VOP_V2BF16_F32_F32>>;
1613+
defm V_CVT_SR_PK_BF16_F32 : VOP3Inst<"v_cvt_sr_pk_bf16_f32", VOP3_Profile<VOP_V2BF16_F32_F32_I32>, int_amdgcn_cvt_sr_pk_bf16_f32>;
16131614
}
16141615
def : GCNPat<(v2bf16 (bf16_fpround v2f32:$src)),
16151616
(V_CVT_PK_BF16_F32_e64 0, (EXTRACT_SUBREG VReg_64:$src, sub0), 0, (EXTRACT_SUBREG VReg_64:$src, sub1))>;
@@ -2013,6 +2014,7 @@ let AssemblerPredicate = isGFX11Plus in {
20132014
// These instructions differ from GFX12 variant by supporting DPP:
20142015
defm V_LSHL_ADD_U64 : VOP3Only_Realtriple_gfx1250<0x252>;
20152016
defm V_CVT_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36d>;
2017+
defm V_CVT_SR_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36e>;
20162018

20172019
//===----------------------------------------------------------------------===//
20182020
// GFX10.

llvm/lib/TargetParser/TargetParser.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
446446
Features["tanh-insts"] = true;
447447
Features["transpose-load-f4f6-insts"] = true;
448448
Features["bf16-trans-insts"] = true;
449+
Features["bf16-cvt-insts"] = true;
449450
Features["fp8-conversion-insts"] = true;
450451
Features["fp8e5m3-insts"] = true;
451452
Features["permlane16-swap"] = true;
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
3+
; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
4+
5+
; FIXME: GlobalISel does not work with bf16
6+
7+
declare <2 x bfloat> @llvm.amdgcn.cvt.sr.pk.bf16.f32(float, float, i32) #0
8+
9+
define amdgpu_ps float @cvt_sr_pk_bf16_f32_vvv(float %src0, float %src1, i32 %src2) #1 {
10+
; GCN-LABEL: cvt_sr_pk_bf16_f32_vvv:
11+
; GCN: ; %bb.0:
12+
; GCN-NEXT: v_cvt_sr_pk_bf16_f32 v0, v0, v1, v2
13+
; GCN-NEXT: ; return to shader part epilog
14+
%cvt = call <2 x bfloat> @llvm.amdgcn.cvt.sr.pk.bf16.f32(float %src0, float %src1, i32 %src2) #0
15+
%ret = bitcast <2 x bfloat> %cvt to float
16+
ret float %ret
17+
}
18+
19+
define amdgpu_ps float @cvt_sr_pk_bf16_f32_sss(float inreg %src0, float inreg %src1, i32 inreg %src2) #1 {
20+
; GCN-LABEL: cvt_sr_pk_bf16_f32_sss:
21+
; GCN: ; %bb.0:
22+
; GCN-NEXT: v_mov_b32_e32 v0, s2
23+
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
24+
; GCN-NEXT: v_cvt_sr_pk_bf16_f32 v0, s0, s1, v0
25+
; GCN-NEXT: ; return to shader part epilog
26+
%cvt = call <2 x bfloat> @llvm.amdgcn.cvt.sr.pk.bf16.f32(float %src0, float %src1, i32 %src2) #0
27+
%ret = bitcast <2 x bfloat> %cvt to float
28+
ret float %ret
29+
}
30+
31+
define amdgpu_ps float @cvt_sr_pk_bf16_f32_vvi(float %src0, float %src1) #1 {
32+
; GCN-LABEL: cvt_sr_pk_bf16_f32_vvi:
33+
; GCN: ; %bb.0:
34+
; GCN-NEXT: v_cvt_sr_pk_bf16_f32 v0, v0, v1, 0x10002
35+
; GCN-NEXT: ; return to shader part epilog
36+
%cvt = call <2 x bfloat> @llvm.amdgcn.cvt.sr.pk.bf16.f32(float %src0, float %src1, i32 65538) #0
37+
%ret = bitcast <2 x bfloat> %cvt to float
38+
ret float %ret
39+
}
40+
41+
define amdgpu_ps float @cvt_sr_pk_bf16_f32_vvi_mods(float %src0, float %src1) #1 {
42+
; GCN-LABEL: cvt_sr_pk_bf16_f32_vvi_mods:
43+
; GCN: ; %bb.0:
44+
; GCN-NEXT: v_cvt_sr_pk_bf16_f32 v0, -v0, |v1|, 1
45+
; GCN-NEXT: ; return to shader part epilog
46+
%s0 = fneg float %src0
47+
%s1 = call float @llvm.fabs.f32(float %src1) #0
48+
%cvt = call <2 x bfloat> @llvm.amdgcn.cvt.sr.pk.bf16.f32(float %s0, float %s1, i32 1) #0
49+
%ret = bitcast <2 x bfloat> %cvt to float
50+
ret float %ret
51+
}
52+
53+
define amdgpu_ps float @cvt_sr_pk_bf16_f32_ssi(float inreg %src0, float inreg %src1) #1 {
54+
; GCN-LABEL: cvt_sr_pk_bf16_f32_ssi:
55+
; GCN: ; %bb.0:
56+
; GCN-NEXT: v_cvt_sr_pk_bf16_f32 v0, s0, s1, 1
57+
; GCN-NEXT: ; return to shader part epilog
58+
%cvt = call <2 x bfloat> @llvm.amdgcn.cvt.sr.pk.bf16.f32(float %src0, float %src1, i32 1) #0
59+
%ret = bitcast <2 x bfloat> %cvt to float
60+
ret float %ret
61+
}
62+
63+
declare float @llvm.fabs.f32(float) #0
64+
65+
attributes #0 = { nounwind readnone }
66+
attributes #1 = { nounwind }

llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,3 +321,48 @@ v_cvt_pk_bf16_f32 v5, src_scc, vcc_lo mul:4
321321

322322
v_cvt_pk_bf16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2
323323
// GFX1250: v_cvt_pk_bf16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x6d,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf]
324+
325+
v_cvt_sr_pk_bf16_f32 v5, v1, v2, s3
326+
// GFX1250: v_cvt_sr_pk_bf16_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x6e,0xd7,0x01,0x05,0x0e,0x00]
327+
328+
v_cvt_sr_pk_bf16_f32 v5, v255, s2, s105
329+
// GFX1250: v_cvt_sr_pk_bf16_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x6e,0xd7,0xff,0x05,0xa4,0x01]
330+
331+
v_cvt_sr_pk_bf16_f32 v5, s1, v255, exec_hi
332+
// GFX1250: v_cvt_sr_pk_bf16_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x6e,0xd7,0x01,0xfe,0xff,0x01]
333+
334+
v_cvt_sr_pk_bf16_f32 v5, s105, s105, exec_lo
335+
// GFX1250: v_cvt_sr_pk_bf16_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x6e,0xd7,0x69,0xd2,0xf8,0x01]
336+
337+
v_cvt_sr_pk_bf16_f32 v5, vcc_lo, ttmp15, v3
338+
// GFX1250: v_cvt_sr_pk_bf16_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x6e,0xd7,0x6a,0xf6,0x0c,0x04]
339+
340+
v_cvt_sr_pk_bf16_f32 v5, vcc_hi, 0xaf123456, v255
341+
// GFX1250: v_cvt_sr_pk_bf16_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x6e,0xd7,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
342+
343+
v_cvt_sr_pk_bf16_f32 v5, -|ttmp15|, -|src_scc|, ttmp15
344+
// GFX1250: v_cvt_sr_pk_bf16_f32 v5, -|ttmp15|, -|src_scc|, ttmp15 ; encoding: [0x05,0x03,0x6e,0xd7,0x7b,0xfa,0xed,0x61]
345+
346+
v_cvt_sr_pk_bf16_f32 v5, m0, 0.5, m0
347+
// GFX1250: v_cvt_sr_pk_bf16_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x6e,0xd7,0x7d,0xe0,0xf5,0x01]
348+
349+
v_cvt_sr_pk_bf16_f32 v5, |exec_lo|, -1, vcc_hi
350+
// GFX1250: v_cvt_sr_pk_bf16_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x6e,0xd7,0x7e,0x82,0xad,0x01]
351+
352+
v_cvt_sr_pk_bf16_f32 v5, -|exec_hi|, null, vcc_lo
353+
// GFX1250: v_cvt_sr_pk_bf16_f32 v5, -|exec_hi|, null, vcc_lo ; encoding: [0x05,0x01,0x6e,0xd7,0x7f,0xf8,0xa8,0x21]
354+
355+
v_cvt_sr_pk_bf16_f32 v5, null, exec_lo, 0xaf123456
356+
// GFX1250: v_cvt_sr_pk_bf16_f32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x6e,0xd7,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf]
357+
358+
v_cvt_sr_pk_bf16_f32 v5, -1, -|exec_hi|, src_scc
359+
// GFX1250: v_cvt_sr_pk_bf16_f32 v5, -1, -|exec_hi|, src_scc ; encoding: [0x05,0x02,0x6e,0xd7,0xc1,0xfe,0xf4,0x43]
360+
361+
v_cvt_sr_pk_bf16_f32 v5, 0.5, -m0, 0.5 mul:2
362+
// GFX1250: v_cvt_sr_pk_bf16_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x6e,0xd7,0xf0,0xfa,0xc0,0x4b]
363+
364+
v_cvt_sr_pk_bf16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4
365+
// GFX1250: v_cvt_sr_pk_bf16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x6e,0xd7,0xfd,0xd4,0x04,0x33]
366+
367+
v_cvt_sr_pk_bf16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2
368+
// GFX1250: v_cvt_sr_pk_bf16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x6e,0xd7,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf]

0 commit comments

Comments
 (0)