diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index ec00fadf3039a..172ac467f7cad 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -702,6 +702,10 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f16, "sV2h", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f16, "sV2h", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f16, "ihiUiIi", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f16, "ihiUiIi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_i4_i8, "UsUi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_u4_u8, "UsUi", "nc", "gfx1250-insts") diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl index 2595442ba7f9e..1c67fc3879bff 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl @@ -398,6 +398,144 @@ void test_cvt_pk_f16_bf8(global half2* out, short a) out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a); } +// CHECK-LABEL: @test_cvt_pk_bf8_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store <2 x half> [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i16 [[TMP1]], ptr addrspace(1) [[TMP2]], align 2 +// CHECK-NEXT: ret void +// +void test_cvt_pk_bf8_f16(global short* out, half2 a) +{ + *out = __builtin_amdgcn_cvt_pk_bf8_f16(a); +} + +// CHECK-LABEL: @test_cvt_pk_fp8_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store <2 x half> [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i16 [[TMP1]], ptr addrspace(1) [[TMP2]], align 2 +// CHECK-NEXT: ret void +// +void test_cvt_pk_fp8_f16(global short* out, half2 a) +{ + *out = __builtin_amdgcn_cvt_pk_fp8_f16(a); +} + +// CHECK-LABEL: @test_cvt_sr_bf8_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[SR_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[SR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SR_ADDR]] to ptr +// CHECK-NEXT: [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: store i32 [[SR:%.*]], ptr [[SR_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 0) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP5]], i32 [[TMP6]], i32 [[TMP7]], i32 1) +// CHECK-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) [[TMP9]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP10]], i32 [[TMP11]], i32 [[TMP12]], i32 2) +// CHECK-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP13]], ptr addrspace(1) [[TMP14]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP15]], i32 [[TMP16]], i32 [[TMP17]], i32 3) +// CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP18]], ptr addrspace(1) [[TMP19]], align 4 +// CHECK-NEXT: ret void +// +void test_cvt_sr_bf8_f16(global int* out, half a, uint sr, int old) +{ + *out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 0); + *out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 1); + *out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 2); + *out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 3); +} + +// CHECK-LABEL: @test_cvt_sr_fp8_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[SR_ADDR:%.*]] = alloca i16, align 2, addrspace(5) +// CHECK-NEXT: [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[SR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SR_ADDR]] to ptr +// CHECK-NEXT: [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: store i16 [[SR:%.*]], ptr [[SR_ADDR_ASCAST]], align 2 +// CHECK-NEXT: store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP0]], i32 [[CONV]], i32 [[TMP2]], i32 0) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[CONV1:%.*]] = sext i16 [[TMP6]] to i32 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP5]], i32 [[CONV1]], i32 [[TMP7]], i32 1) +// CHECK-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) [[TMP9]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP11:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP10]], i32 [[CONV2]], i32 [[TMP12]], i32 2) +// CHECK-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP13]], ptr addrspace(1) [[TMP14]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP16:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[CONV3:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP15]], i32 [[CONV3]], i32 [[TMP17]], i32 3) +// CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP18]], ptr addrspace(1) [[TMP19]], align 4 +// CHECK-NEXT: ret void +// +void test_cvt_sr_fp8_f16(global int* out, half a, short sr, int old) +{ + *out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 0); + *out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 1); + *out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 2); + *out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 3); +} + // CHECK-LABEL: @test_sat_pk4_i4_i8( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index c29f6c740f1a4..4a50558ca8e86 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -605,6 +605,30 @@ def int_amdgcn_cvt_pk_f16_bf8 : DefaultAttrsIntrinsic< [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable] >, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">; +def int_amdgcn_cvt_pk_fp8_f16 + : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty], + [IntrNoMem, IntrSpeculatable]>, + ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f16">; + +def int_amdgcn_cvt_pk_bf8_f16 + : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty], + [IntrNoMem, IntrSpeculatable]>, + ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f16">; + +// llvm.amdgcn.cvt.sr.fp8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3] +// byte_sel selects byte to write in vdst. +def int_amdgcn_cvt_sr_fp8_f16 : DefaultAttrsIntrinsic< + [llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, ImmArg>] +>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f16">; + +// llvm.amdgcn.cvt.sr.bf8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3] +// byte_sel selects byte to write in vdst. +def int_amdgcn_cvt_sr_bf8_f16 : DefaultAttrsIntrinsic< + [llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, ImmArg>] +>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f16">; + class AMDGPUCvtScaleF32Intrinsic : DefaultAttrsIntrinsic< [DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] >, ClangBuiltin<"__builtin_amdgcn_"#name>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 25f428ac6c8a9..6bca2fec17c6d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4577,6 +4577,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_cvt_sr_pk_bf16_f32: case Intrinsic::amdgcn_cvt_pk_f16_fp8: case Intrinsic::amdgcn_cvt_pk_f16_bf8: + case Intrinsic::amdgcn_cvt_pk_fp8_f16: + case Intrinsic::amdgcn_cvt_pk_bf8_f16: + case Intrinsic::amdgcn_cvt_sr_fp8_f16: + case Intrinsic::amdgcn_cvt_sr_bf8_f16: case Intrinsic::amdgcn_sat_pk4_i4_i8: case Intrinsic::amdgcn_sat_pk4_u4_u8: case Intrinsic::amdgcn_fmed3: diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 44e65b3588888..a4ea8cf423f84 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -689,6 +689,8 @@ class AMDGPUOperand : public MCParsedAsmOperand { bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); } + bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); } + bool isVISrcB32() const { return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); } @@ -2036,6 +2038,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: case AMDGPU::OPERAND_KIMM16: return &APFloat::IEEEhalf(); case AMDGPU::OPERAND_REG_IMM_BF16: @@ -2405,6 +2408,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: case AMDGPU::OPERAND_REG_IMM_V2FP32: case AMDGPU::OPERAND_REG_IMM_V2INT32: case AMDGPU::OPERAND_KIMM32: @@ -2456,6 +2460,9 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo setImmKindConst(); return; } + [[fallthrough]]; + + case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: Inst.addOperand(MCOperand::createImm(Lo_32(Val))); setImmKindLiteral(); @@ -3761,6 +3768,9 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16) return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm()); + if (OperandType == AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16) + return false; + llvm_unreachable("invalid operand type"); } default: @@ -9421,7 +9431,19 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 || Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 || Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 || - Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) { + Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 || + Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 || + Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 || + Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 || + Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 || + Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 || + Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 || + Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 || + Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 || + Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 || + Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 || + Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 || + Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) { Inst.addOperand(Inst.getOperand(0)); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 11b072e05a6e1..15088ac25863f 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -540,6 +540,8 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType, printImmediateBFloat16(static_cast(Imm), STI, O)) return; break; + case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: + break; default: llvm_unreachable("bad operand type"); } @@ -770,6 +772,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo, case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_IMM_V2BF16: case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp index c49ad798c1824..f3580842c6ff0 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp @@ -341,6 +341,9 @@ std::optional AMDGPUMCCodeEmitter::getLitEncoding( return AMDGPU::getInlineEncodingV2BF16(static_cast(Imm)) .value_or(255); + case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: + return 255; + case AMDGPU::OPERAND_KIMM32: case AMDGPU::OPERAND_KIMM16: case AMDGPU::OPERAND_KIMM64: diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 40b8bcde005c0..c56414519a6fe 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -208,6 +208,7 @@ enum OperandType : unsigned { OPERAND_REG_IMM_V2BF16, OPERAND_REG_IMM_V2FP16, OPERAND_REG_IMM_V2INT16, + OPERAND_REG_IMM_NOINLINE_V2FP16, OPERAND_REG_IMM_V2INT32, OPERAND_REG_IMM_V2FP32, diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index b77da4d612dd4..e934152d08acb 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -468,6 +468,7 @@ bool SIFoldOperandsImpl::canUseImmWithOpSel(const MachineInstr *MI, case AMDGPU::OPERAND_REG_IMM_V2FP16: case AMDGPU::OPERAND_REG_IMM_V2BF16: case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index c2da937552240..044a681bfed3d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4438,6 +4438,8 @@ bool SIInstrInfo::isInlineConstant(int64_t Imm, uint8_t OperandType) const { case AMDGPU::OPERAND_REG_IMM_V2BF16: case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: return AMDGPU::isInlinableLiteralV2BF16(Imm); + case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: + return false; case AMDGPU::OPERAND_REG_IMM_FP16: case AMDGPU::OPERAND_REG_INLINE_C_FP16: { if (isInt<16>(Imm) || isUInt<16>(Imm)) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 7324b6a347447..efcc88e564e65 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2859,6 +2859,7 @@ def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>; def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>; def VOP_BF16_BF16 : VOPProfile<[bf16, bf16, untyped, untyped]>; def VOP1_I16_I32 : VOPProfile<[i16, i32, untyped, untyped]>; +def VOP_I16_V2F16 : VOPProfile<[i16, v2f16, untyped, untyped]>; def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>; def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 218841d182ff2..36d1a3b9442ec 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1218,6 +1218,8 @@ def VSrc_f64 : SrcRegOrImm9 { def VSrc_v2b32 : SrcRegOrImm9 ; def VSrc_v2f32 : SrcRegOrImm9 ; +def VSrc_NoInline_v2f16 : SrcRegOrImm9 ; + //===----------------------------------------------------------------------===// // VRegSrc_* Operands with a VGPR //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 83e63ac4e9777..5827f18643ed4 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2659,6 +2659,7 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_IMM_FP16: case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_FP32: case AMDGPU::OPERAND_REG_INLINE_C_FP64: case AMDGPU::OPERAND_REG_INLINE_C_FP16: @@ -3023,6 +3024,8 @@ bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) { case AMDGPU::OPERAND_REG_IMM_V2BF16: case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: return isInlinableLiteralV2BF16(Literal); + case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: + return false; default: llvm_unreachable("bad packed operand type"); } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index c09a9d694f3d8..74d59f4852ef3 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1636,6 +1636,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_IMM_V2BF16: case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: return 2; default: diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index cc2d1103e4c55..2d3caec72dea8 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -1623,6 +1623,53 @@ let SubtargetPredicate = HasBF16ConversionInsts in { (V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, 0, (f32 (IMPLICIT_DEF)))>; } +let Src0RC64 = VSrc_NoInline_v2f16 in { +def VOP3_CVT_PK_F8_F16_Profile : VOP3_Profile; +def VOP3_CVT_PK_F8_F16_True16_Profile : VOP3_Profile_True16; +def VOP3_CVT_PK_F8_F16_Fake16_Profile : VOP3_Profile_Fake16; +} + +let ReadsModeReg = 0, IsPacked = 0, SubtargetPredicate = isGFX125xOnly in { + defm V_CVT_PK_FP8_F16_gfx1250 : VOP3Inst_t16_with_profiles<"v_cvt_pk_fp8_f16_gfx1250", + VOP3_CVT_PK_F8_F16_Profile, + VOP3_CVT_PK_F8_F16_True16_Profile, + VOP3_CVT_PK_F8_F16_Fake16_Profile, + int_amdgcn_cvt_pk_fp8_f16>; + defm V_CVT_PK_BF8_F16_gfx1250 : VOP3Inst_t16_with_profiles<"v_cvt_pk_bf8_f16_gfx1250", + VOP3_CVT_PK_F8_F16_Profile, + VOP3_CVT_PK_F8_F16_True16_Profile, + VOP3_CVT_PK_F8_F16_Fake16_Profile, + int_amdgcn_cvt_pk_bf8_f16>; +} + +let HasClamp = 0, HasOpSel = 1 in { +def VOP3_CVT_SR_F8_F16_Profile : VOP3_CVT_SR_F8_ByteSel_Profile; +def VOP3_CVT_SR_F8_F16_True16_Profile : VOP3_Profile_True16; +def VOP3_CVT_SR_F8_F16_Fake16_Profile : VOP3_Profile_Fake16; +} + +let SubtargetPredicate = isGFX1250Plus in { + let ReadsModeReg = 0 in { + // These instructions have non-standard use of op_sel. They are using bits 2 and 3 of opsel + // to select a byte in the vdst. Bits 0 and 1 are unused. + let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in { + defm V_CVT_SR_FP8_F16 : VOP3Inst_t16_with_profiles<"v_cvt_sr_fp8_f16", VOP3_CVT_SR_F8_F16_Profile, + VOP3_CVT_SR_F8_F16_True16_Profile, VOP3_CVT_SR_F8_F16_Fake16_Profile>; + defm V_CVT_SR_BF8_F16 : VOP3Inst_t16_with_profiles<"v_cvt_sr_bf8_f16", VOP3_CVT_SR_F8_F16_Profile, + VOP3_CVT_SR_F8_F16_True16_Profile, VOP3_CVT_SR_F8_F16_Fake16_Profile>; + } + } // End ReadsModeReg = 0 + + let True16Predicate = UseRealTrue16Insts in { + def : Cvt_SR_F8_ByteSel_Pat; + def : Cvt_SR_F8_ByteSel_Pat; + } + let True16Predicate = UseFakeTrue16Insts in { + def : Cvt_SR_F8_ByteSel_Pat; + def : Cvt_SR_F8_ByteSel_Pat; + } +} // End SubtargetPredicate = isGFX1250Plus + class Cvt_Scale_Sr_F32ToBF16F16_Pat : GCNPat< (DstTy (node DstTy:$vdst_in, f32:$src0, i32:$src1, timm:$word_sel)), (inst (DstSelToOpSelXForm $word_sel), $src0, 0, $src1, VGPR_32:$vdst_in) @@ -2019,6 +2066,10 @@ defm V_ASHR_PK_I8_I32 : VOP3Only_Realtriple_gfx1250<0x290>; defm V_ASHR_PK_U8_I32 : VOP3Only_Realtriple_gfx1250<0x291>; defm V_CVT_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36d>; defm V_CVT_SR_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36e>; +defm V_CVT_PK_FP8_F16_gfx1250 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x372, "v_cvt_pk_fp8_f16">; +defm V_CVT_PK_BF8_F16_gfx1250 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x373, "v_cvt_pk_bf8_f16">; +defm V_CVT_SR_FP8_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x374>; +defm V_CVT_SR_BF8_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x375>; //===----------------------------------------------------------------------===// // GFX10. diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index badbba9487d63..a029376c723ff 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -2071,6 +2071,15 @@ multiclass VOP3Only_Real_Base_gfx1250 op> : multiclass VOP3Only_Realtriple_gfx1250 op, bit isSingle = 0> : VOP3_Realtriple; +multiclass VOP3Only_Realtriple_with_name_gfx1250 op, string opName, + string asmName, string pseudo_mnemonic = "", + bit isSingle = 0> : + VOP3_Realtriple_with_name; + +multiclass VOP3Only_Realtriple_t16_gfx1250 op, string asmName = !cast(NAME#"_e64").Mnemonic, + string opName = NAME, string pseudo_mnemonic = "", bit isSingle = 0> : + VOP3Only_Realtriple_with_name_gfx1250; + multiclass VOP3_Realtriple_t16_gfx12 op, string asmName, string opName = NAME, string pseudo_mnemonic = "", bit isSingle = 0> : VOP3_Realtriple_with_name; @@ -2091,6 +2100,13 @@ multiclass VOP3Only_Realtriple_t16_and_fake16_gfx12 op, string asmName, defm _fake16 : VOP3Only_Realtriple_t16_gfx12; } +multiclass VOP3Only_Realtriple_t16_and_fake16_gfx1250 op, + string asmName = !cast(NAME#"_e64").Mnemonic, + string opName = NAME, string pseudo_mnemonic = ""> { + defm _t16 : VOP3Only_Realtriple_t16_gfx1250; + defm _fake16 : VOP3Only_Realtriple_t16_gfx1250; +} + multiclass VOP3be_Real_with_name_gfx12 op, string opName, string asmName, bit isSingle = 0> { defvar ps = !cast(opName#"_e64"); diff --git a/llvm/test/CodeGen/AMDGPU/code-size-estimate-gfx1250.ll b/llvm/test/CodeGen/AMDGPU/code-size-estimate-gfx1250.ll new file mode 100644 index 0000000000000..fcbf7efdaa653 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/code-size-estimate-gfx1250.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX1250 %s + +define i16 @cvt_pk_bf8_f16_v(ptr addrspace(1) %out) { +; GFX1250-LABEL: cvt_pk_bf8_f16_v: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: v_cvt_pk_bf8_f16 v0, 0x38003800 ; encoding: [0x00,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x38] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] + %cvt = tail call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half> ) + ret i16 %cvt +} + +; GFX1250: codeLenInByte = 24 + +define i16 @cvt_pk_fp8_f16_v(ptr addrspace(1) %out) { +; GFX1250-LABEL: cvt_pk_fp8_f16_v: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: v_cvt_pk_fp8_f16 v0, 0x3800 ; encoding: [0x00,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] + %cvt = tail call i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half> ) + ret i16 %cvt +} + +; GFX1250: codeLenInByte = 24 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll new file mode 100644 index 0000000000000..6ccfad74ef9d4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll @@ -0,0 +1,539 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG-REAL16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG-FAKE16 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-REAL16 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-FAKE16 %s + +declare i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half>) +declare i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half>) +declare i32 @llvm.amdgcn.cvt.sr.bf8.f16(half, i32, i32, i32) +declare i32 @llvm.amdgcn.cvt.sr.fp8.f16(half, i32, i32, i32) + +define amdgpu_ps void @test_cvt_pk_bf8_f16_v(<2 x half> %a, ptr addrspace(1) %out) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_bf8_f16_v: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1 +; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_bf8_f16 v0.l, v0 +; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[2:3], v0 +; GFX1250-SDAG-REAL16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_bf8_f16_v: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1 +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_bf8_f16 v0, v0 +; GFX1250-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v0, off +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_bf8_f16_v: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2 +; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_bf8_f16 v0.l, v0 +; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[4:5], v0 +; GFX1250-GISEL-REAL16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_bf8_f16_v: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2 +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_bf8_f16 v0, v0 +; GFX1250-GISEL-FAKE16-NEXT: global_store_b16 v[4:5], v0, off +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm + %cvt = tail call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half> %a) + store i16 %cvt, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @test_cvt_pk_bf8_f16_s(<2 x half> inreg %a, ptr addrspace(1) %out) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_bf8_f16_s: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, s0 +; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2 +; GFX1250-SDAG-REAL16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_bf8_f16_s: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_bf8_f16 v2, s0 +; GFX1250-SDAG-FAKE16-NEXT: global_store_b16 v[0:1], v2, off +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_bf8_f16_s: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, s0 +; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2 +; GFX1250-GISEL-REAL16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_bf8_f16_s: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_bf8_f16 v2, s0 +; GFX1250-GISEL-FAKE16-NEXT: global_store_b16 v[0:1], v2, off +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm + %cvt = tail call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half> %a) + store i16 %cvt, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @test_cvt_pk_bf8_f16_l(ptr addrspace(1) %out) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_bf8_f16_l: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, 0x56400000 +; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2 +; GFX1250-SDAG-REAL16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_bf8_f16_l: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_bf8_f16 v2, 0x56400000 +; GFX1250-SDAG-FAKE16-NEXT: global_store_b16 v[0:1], v2, off +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_bf8_f16_l: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, 0x56400000 +; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2 +; GFX1250-GISEL-REAL16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_bf8_f16_l: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_bf8_f16 v2, 0x56400000 +; GFX1250-GISEL-FAKE16-NEXT: global_store_b16 v[0:1], v2, off +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm + %cvt = tail call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half> ) + store i16 %cvt, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @test_cvt_pk_fp8_f16_v(<2 x half> %a, ptr addrspace(1) %out) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_fp8_f16_v: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1 +; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_fp8_f16 v0.l, v0 +; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[2:3], v0 +; GFX1250-SDAG-REAL16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_fp8_f16_v: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1 +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_fp8_f16 v0, v0 +; GFX1250-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v0, off +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_fp8_f16_v: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2 +; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_fp8_f16 v0.l, v0 +; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[4:5], v0 +; GFX1250-GISEL-REAL16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_fp8_f16_v: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2 +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_fp8_f16 v0, v0 +; GFX1250-GISEL-FAKE16-NEXT: global_store_b16 v[4:5], v0, off +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm + %cvt = tail call i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half> %a) + store i16 %cvt, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @test_cvt_pk_fp8_f16_s(<2 x half> inreg %a, ptr addrspace(1) %out) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_fp8_f16_s: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, s0 +; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2 +; GFX1250-SDAG-REAL16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_fp8_f16_s: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_fp8_f16 v2, s0 +; GFX1250-SDAG-FAKE16-NEXT: global_store_b16 v[0:1], v2, off +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_fp8_f16_s: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, s0 +; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2 +; GFX1250-GISEL-REAL16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_fp8_f16_s: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_fp8_f16 v2, s0 +; GFX1250-GISEL-FAKE16-NEXT: global_store_b16 v[0:1], v2, off +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm + %cvt = tail call i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half> %a) + store i16 %cvt, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @test_cvt_pk_fp8_f16_l(ptr addrspace(1) %out) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_fp8_f16_l: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, 0x56400000 +; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2 +; GFX1250-SDAG-REAL16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_fp8_f16_l: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_fp8_f16 v2, 0x56400000 +; GFX1250-SDAG-FAKE16-NEXT: global_store_b16 v[0:1], v2, off +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_fp8_f16_l: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, 0x56400000 +; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2 +; GFX1250-GISEL-REAL16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_fp8_f16_l: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_fp8_f16 v2, 0x56400000 +; GFX1250-GISEL-FAKE16-NEXT: global_store_b16 v[0:1], v2, off +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm + %cvt = tail call i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half> ) + store i16 %cvt, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @test_cvt_sr_bf8_f16_byte0(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_bf8_f16_byte0: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 +; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-REAL16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte0: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 +; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_bf8_f16_byte0: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4 +; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 +; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-REAL16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte0: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4 +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 +; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm + %cvt = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %a, i32 %sr, i32 %old, i32 0) + store i32 %cvt, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @test_cvt_sr_bf8_f16_byte1(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_bf8_f16_byte1: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 byte_sel:1 +; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-REAL16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte1: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 byte_sel:1 +; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_bf8_f16_byte1: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4 +; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 byte_sel:1 +; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-REAL16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte1: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4 +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 byte_sel:1 +; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm + %cvt = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %a, i32 %sr, i32 %old, i32 1) + store i32 %cvt, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @test_cvt_sr_bf8_f16_byte2(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_bf8_f16_byte2: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 byte_sel:2 +; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-REAL16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte2: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 byte_sel:2 +; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_bf8_f16_byte2: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4 +; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 byte_sel:2 +; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-REAL16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte2: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4 +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 byte_sel:2 +; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm + %cvt = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %a, i32 %sr, i32 %old, i32 2) + store i32 %cvt, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @test_cvt_sr_bf8_f16_byte3(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_bf8_f16_byte3: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 byte_sel:3 +; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-REAL16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte3: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 byte_sel:3 +; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_bf8_f16_byte3: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4 +; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 byte_sel:3 +; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-REAL16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_bf8_f16_byte3: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4 +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 byte_sel:3 +; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm + %cvt = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %a, i32 %sr, i32 %old, i32 3) + store i32 %cvt, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @test_cvt_sr_bf8_f16_hi_byte0(<2 x half> %a, i32 %sr, i32 %old, ptr addrspace(1) %out) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_bf8_f16_hi_byte0: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.h, v1 +; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-REAL16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_bf8_f16_hi_byte0: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_lshrrev_b32 v0, 16, v0 +; GFX1250-SDAG-FAKE16-NEXT: v_mov_b32_e32 v4, v3 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 +; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_bf8_f16_hi_byte0: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_dual_lshrrev_b32 v0, 16, v0 :: v_dual_mov_b32 v6, v3 +; GFX1250-GISEL-REAL16-NEXT: v_mov_b32_e32 v7, v4 +; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_bf8_f16 v2, v0.l, v1 +; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-REAL16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_bf8_f16_hi_byte0: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_dual_lshrrev_b32 v0, 16, v0 :: v_dual_mov_b32 v6, v3 +; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v7, v4 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_bf8_f16 v2, v0, v1 +; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm + %a.1 = extractelement <2 x half> %a, i32 1 + %cvt = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %a.1, i32 %sr, i32 %old, i32 0) + store i32 %cvt, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @test_cvt_sr_fp8_f16_byte0(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_fp8_f16_byte0: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 +; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-REAL16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte0: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 +; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_fp8_f16_byte0: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4 +; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 +; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-REAL16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte0: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4 +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 +; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm + %cvt = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %a, i32 %sr, i32 %old, i32 0) + store i32 %cvt, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @test_cvt_sr_fp8_f16_byte1(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_fp8_f16_byte1: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 byte_sel:1 +; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-REAL16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte1: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 byte_sel:1 +; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_fp8_f16_byte1: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4 +; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 byte_sel:1 +; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-REAL16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte1: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4 +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 byte_sel:1 +; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm + %cvt = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %a, i32 %sr, i32 %old, i32 1) + store i32 %cvt, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @test_cvt_sr_fp8_f16_byte2(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_fp8_f16_byte2: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 byte_sel:2 +; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-REAL16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte2: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 byte_sel:2 +; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_fp8_f16_byte2: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4 +; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 byte_sel:2 +; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-REAL16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte2: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4 +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 byte_sel:2 +; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm + %cvt = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %a, i32 %sr, i32 %old, i32 2) + store i32 %cvt, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @test_cvt_sr_fp8_f16_byte3(half %a, i32 %sr, i32 %old, ptr addrspace(1) %out) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_fp8_f16_byte3: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 byte_sel:3 +; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-REAL16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte3: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 byte_sel:3 +; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_fp8_f16_byte3: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4 +; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 byte_sel:3 +; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-REAL16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_fp8_f16_byte3: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_dual_mov_b32 v6, v3 :: v_dual_mov_b32 v7, v4 +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 byte_sel:3 +; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm + %cvt = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %a, i32 %sr, i32 %old, i32 3) + store i32 %cvt, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps void @test_cvt_sr_fp8_f16_hi_byte0(<2 x half> %a, i32 %sr, i32 %old, ptr addrspace(1) %out) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_sr_fp8_f16_hi_byte0: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 +; GFX1250-SDAG-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.h, v1 +; GFX1250-SDAG-REAL16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-REAL16-NEXT: s_endpgm +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_sr_fp8_f16_hi_byte0: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_lshrrev_b32 v0, 16, v0 +; GFX1250-SDAG-FAKE16-NEXT: v_mov_b32_e32 v4, v3 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 +; GFX1250-SDAG-FAKE16-NEXT: global_store_b32 v[4:5], v2, off +; GFX1250-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_sr_fp8_f16_hi_byte0: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_dual_lshrrev_b32 v0, 16, v0 :: v_dual_mov_b32 v6, v3 +; GFX1250-GISEL-REAL16-NEXT: v_mov_b32_e32 v7, v4 +; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-GISEL-REAL16-NEXT: v_cvt_sr_fp8_f16 v2, v0.l, v1 +; GFX1250-GISEL-REAL16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-REAL16-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_sr_fp8_f16_hi_byte0: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_dual_lshrrev_b32 v0, 16, v0 :: v_dual_mov_b32 v6, v3 +; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v7, v4 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_sr_fp8_f16 v2, v0, v1 +; GFX1250-GISEL-FAKE16-NEXT: global_store_b32 v[6:7], v2, off +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm + %a.1 = extractelement <2 x half> %a, i32 1 + %cvt = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %a.1, i32 %sr, i32 %old, i32 0) + store i32 %cvt, ptr addrspace(1) %out + ret void +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX1250: {{.*}} diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s index f3bf07e01250a..81fc477b19ac1 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s @@ -402,3 +402,148 @@ v_ashr_pk_u8_i32 v2, v4, v7, 12345 v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] // GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04] + +v_cvt_pk_bf8_f16 v1, v2 op_sel:[0,0] +// GFX1250: v_cvt_pk_bf8_f16 v1, v2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x01,0x00,0x00] + +v_cvt_pk_bf8_f16 v1, v2 op_sel:[0,1] +// GFX1250: v_cvt_pk_bf8_f16 v1, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x73,0xd7,0x02,0x01,0x00,0x00] + +v_cvt_pk_bf8_f16 v1, v2 clamp +// GFX1250: v_cvt_pk_bf8_f16 v1, v2 clamp ; encoding: [0x01,0x80,0x73,0xd7,0x02,0x01,0x00,0x00] + +v_cvt_pk_bf8_f16 v1, s2 +// GFX1250: v_cvt_pk_bf8_f16 v1, s2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x00,0x00,0x00] + +v_cvt_pk_bf8_f16 v1, 100.0 +// GFX1250: v_cvt_pk_bf8_f16 v1, 0x5640 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00] + +// Inline constants are not supported by v_cvt_pk_bf8_f16 + +v_cvt_pk_bf8_f16 v1, 1 +// GFX1250: v_cvt_pk_bf8_f16 v1, 1 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00] + +v_cvt_pk_bf8_f16 v1, 0x3800 +// GFX1250: v_cvt_pk_bf8_f16 v1, 0x3800 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] + +v_cvt_pk_bf8_f16 v1, 0.5 +// GFX1250: v_cvt_pk_bf8_f16 v1, 0x3800 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] + +v_cvt_pk_bf8_f16 v1, 0x3118 +// GFX1250: v_cvt_pk_bf8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00] + +v_cvt_pk_bf8_f16 v1, 0.15915494 +// GFX1250: v_cvt_pk_bf8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00] + +v_cvt_pk_fp8_f16 v1, v2 op_sel:[0,0] +// GFX1250: v_cvt_pk_fp8_f16 v1, v2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x01,0x00,0x00] + +v_cvt_pk_fp8_f16 v1, v2 op_sel:[0,1] +// GFX1250: v_cvt_pk_fp8_f16 v1, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x72,0xd7,0x02,0x01,0x00,0x00] + +v_cvt_pk_fp8_f16 v1, v2 clamp +// GFX1250: v_cvt_pk_fp8_f16 v1, v2 clamp ; encoding: [0x01,0x80,0x72,0xd7,0x02,0x01,0x00,0x00] + +v_cvt_pk_fp8_f16 v1, s2 +// GFX1250: v_cvt_pk_fp8_f16 v1, s2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x00,0x00,0x00] + +v_cvt_pk_fp8_f16 v1, 100.0 +// GFX1250: v_cvt_pk_fp8_f16 v1, 0x5640 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00] + +// Inline constants are not supported by v_cvt_pk_fp8_f16 + +v_cvt_pk_fp8_f16 v1, 1 +// GFX1250: v_cvt_pk_fp8_f16 v1, 1 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00] + +v_cvt_pk_fp8_f16 v1, 0x3800 +// GFX1250: v_cvt_pk_fp8_f16 v1, 0x3800 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] + +v_cvt_pk_fp8_f16 v1, 0.5 +// GFX1250: v_cvt_pk_fp8_f16 v1, 0x3800 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] + +v_cvt_pk_fp8_f16 v1, 0x3118 +// GFX1250: v_cvt_pk_fp8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00] + +v_cvt_pk_fp8_f16 v1, 0.15915494 +// GFX1250: v_cvt_pk_fp8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00] + +v_cvt_sr_bf8_f16 v1, v2, v3 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] +// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:0 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, v2, s3 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2, s3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x00,0x00] + +v_cvt_sr_bf8_f16 v1, v2, 0x1234 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2, 0x1234 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00] + +v_cvt_sr_bf8_f16 v1, -v2, v3 +// GFX1250: v_cvt_sr_bf8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x20] + +v_cvt_sr_bf8_f16 v1, |v2|, v3 +// GFX1250: v_cvt_sr_bf8_f16 v1, |v2|, v3 ; encoding: [0x01,0x01,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, |v2|, v3 op_sel:[1] +// GFX1250: v_cvt_sr_bf8_f16 v1, |v2|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:3 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] byte_sel:1 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] byte_sel:2 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] byte_sel:3 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, v2, v3 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] +// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x74,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, v2, s3 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2, s3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x00,0x00] + +v_cvt_sr_fp8_f16 v1, v2, 0x1234 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2, 0x1234 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00] + +v_cvt_sr_fp8_f16 v1, -v2, v3 +// GFX1250: v_cvt_sr_fp8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x20] + +v_cvt_sr_fp8_f16 v1, |v2|, v3 +// GFX1250: v_cvt_sr_fp8_f16 v1, |v2|, v3 ; encoding: [0x01,0x01,0x74,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, |v2|, v3 op_sel:[1] +// GFX1250: v_cvt_sr_fp8_f16 v1, |v2|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x74,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x74,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x74,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:3 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x74,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] byte_sel:1 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x74,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] byte_sel:2 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x74,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] byte_sel:3 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x74,0xd7,0x02,0x07,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s index 5b8cbec47957c..209951d455a66 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s @@ -402,3 +402,148 @@ v_ashr_pk_u8_i32 v2, v4, v7, 12345 v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] // GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04] + +v_cvt_pk_bf8_f16 v1.l, v2 +// GFX1250: v_cvt_pk_bf8_f16 v1.l, v2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x01,0x00,0x00] + +v_cvt_pk_bf8_f16 v1.h, v2 +// GFX1250: v_cvt_pk_bf8_f16 v1.h, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x73,0xd7,0x02,0x01,0x00,0x00] + +v_cvt_pk_bf8_f16 v0.l, v2 clamp +// GFX1250: v_cvt_pk_bf8_f16 v0.l, v2 clamp ; encoding: [0x00,0x80,0x73,0xd7,0x02,0x01,0x00,0x00] + +v_cvt_pk_bf8_f16 v1.l, s2 +// GFX1250: v_cvt_pk_bf8_f16 v1.l, s2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x00,0x00,0x00] + +v_cvt_pk_bf8_f16 v1.l, 100.0 +// GFX1250: v_cvt_pk_bf8_f16 v1.l, 0x5640 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00] + +// Inline constants are not supported by v_cvt_pk_bf8_f16 + +v_cvt_pk_bf8_f16 v1.l, 1 +// GFX1250: v_cvt_pk_bf8_f16 v1.l, 1 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00] + +v_cvt_pk_bf8_f16 v1.l, 0x3800 +// GFX1250: v_cvt_pk_bf8_f16 v1.l, 0x3800 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] + +v_cvt_pk_bf8_f16 v1.l, 0.5 +// GFX1250: v_cvt_pk_bf8_f16 v1.l, 0x3800 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] + +v_cvt_pk_bf8_f16 v1.l, 0x3118 +// GFX1250: v_cvt_pk_bf8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00] + +v_cvt_pk_bf8_f16 v1.l, 0.15915494 +// GFX1250: v_cvt_pk_bf8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00] + +v_cvt_pk_fp8_f16 v1.l, v2 +// GFX1250: v_cvt_pk_fp8_f16 v1.l, v2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x01,0x00,0x00] + +v_cvt_pk_fp8_f16 v1.h, v2 +// GFX1250: v_cvt_pk_fp8_f16 v1.h, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x72,0xd7,0x02,0x01,0x00,0x00] + +v_cvt_pk_fp8_f16 v1.l, v2 clamp +// GFX1250: v_cvt_pk_fp8_f16 v1.l, v2 clamp ; encoding: [0x01,0x80,0x72,0xd7,0x02,0x01,0x00,0x00] + +v_cvt_pk_fp8_f16 v1.l, s2 +// GFX1250: v_cvt_pk_fp8_f16 v1.l, s2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x00,0x00,0x00] + +v_cvt_pk_fp8_f16 v1.l, 100.0 +// GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x5640 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00] + +// Inline constants are not supported by v_cvt_pk_fp8_f16 + +v_cvt_pk_fp8_f16 v1.l, 1 +// GFX1250: v_cvt_pk_fp8_f16 v1.l, 1 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00] + +v_cvt_pk_fp8_f16 v1.l, 0x3800 +// GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x3800 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] + +v_cvt_pk_fp8_f16 v1.l, 0.5 +// GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x3800 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] + +v_cvt_pk_fp8_f16 v1.l, 0x3118 +// GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00] + +v_cvt_pk_fp8_f16 v1.l, 0.15915494 +// GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00] + +v_cvt_sr_bf8_f16 v1, v2.l, v3 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2.l, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, v2.h, v3 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:0 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, v2, s3 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2, s3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x00,0x00] + +v_cvt_sr_bf8_f16 v1, v2, 0x1234 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2, 0x1234 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00] + +v_cvt_sr_bf8_f16 v1, -v2, v3 +// GFX1250: v_cvt_sr_bf8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x20] + +v_cvt_sr_bf8_f16 v1, |v2.l|, v3 +// GFX1250: v_cvt_sr_bf8_f16 v1, |v2.l|, v3 ; encoding: [0x01,0x01,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, |v2.h|, v3 +// GFX1250: v_cvt_sr_bf8_f16 v1, |v2.h|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:2 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:2 ; encoding: [0x01,0x40,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:1 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:1 ; encoding: [0x01,0x20,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:3 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:3 ; encoding: [0x01,0x60,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, v2.h, v3 byte_sel:1 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, v2.h, v3 byte_sel:2 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f16 v1, v2.h, v3 byte_sel:3 +// GFX1250: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x75,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, v2.l, v3 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2.l, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, v2.h, v3 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x74,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, v2, s3 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2, s3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x00,0x00] + +v_cvt_sr_fp8_f16 v1, v2, 0x1234 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2, 0x1234 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00] + +v_cvt_sr_fp8_f16 v1, -v2, v3 +// GFX1250: v_cvt_sr_fp8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x20] + +v_cvt_sr_fp8_f16 v1, |v2.l|, v3 +// GFX1250: v_cvt_sr_fp8_f16 v1, |v2.l|, v3 ; encoding: [0x01,0x01,0x74,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, |v2.h|, v3 +// GFX1250: v_cvt_sr_fp8_f16 v1, |v2.h|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x74,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:2 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:2 ; encoding: [0x01,0x40,0x74,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:1 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:1 ; encoding: [0x01,0x20,0x74,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:3 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:3 ; encoding: [0x01,0x60,0x74,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, v2.h, v3 byte_sel:1 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x74,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, v2.h, v3 byte_sel:2 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x74,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f16 v1, v2.h, v3 byte_sel:3 +// GFX1250: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x74,0xd7,0x02,0x07,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s index 4f5be384380e3..c406890e33258 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s @@ -329,3 +329,67 @@ v_ashr_pk_u8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf v_ashr_pk_u8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 // GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf8_f16 v1, v2 op_sel:[0,0] quad_perm:[1,2,3,0] +// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 +// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_fp8_f16 v1, v2 op_sel:[0,0] quad_perm:[1,2,3,0] +// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 +// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2, v3 quad_perm:[0,1,2,3] fi:1 +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] quad_perm:[0,1,2,3] fi:1 +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:3 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] byte_sel:3 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2, v3 quad_perm:[0,1,2,3] fi:1 +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] quad_perm:[0,1,2,3] fi:1 +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:3 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] byte_sel:3 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s index dc45c6a80e9f1..741d1a1c510ba 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s @@ -329,3 +329,67 @@ v_ashr_pk_u8_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf v_ashr_pk_u8_i32 v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 // GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf8_f16 v1.l, v2 quad_perm:[1,2,3,0] +// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf8_f16_e64_dpp v1.h, v2 row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 +// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1.h, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_fp8_f16 v1.l, v2 quad_perm:[1,2,3,0] +// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 +// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2.l, v3 quad_perm:[0,1,2,3] fi:1 +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2.h, v3 quad_perm:[0,1,2,3] fi:1 +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2.h, v3 byte_sel:3 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2.l, v3 quad_perm:[0,1,2,3] fi:1 +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2.h, v3 quad_perm:[0,1,2,3] fi:1 +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2.h, v3 byte_sel:3 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s index 6afd0f599d956..8a3e7ada47bce 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s @@ -225,3 +225,67 @@ v_ashr_pk_u8_i32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] v_ashr_pk_u8_i32 v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x40,0x91,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf8_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x73,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_fp8_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x72,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] dpp8:[1,2,3,4,5,6,7,0] fi:1 +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] dpp8:[1,2,3,4,5,6,7,0] fi:1 +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s index 8c5b84231076e..f79b5c5b8b7ba 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s @@ -225,3 +225,67 @@ v_ashr_pk_u8_i32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] v_ashr_pk_u8_i32 v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, s3 op_sel:[0,0,0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x40,0x91,0xd6,0xea,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf8_f16 v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x73,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf8_f16_e64_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_bf8_f16_e64_dpp v1.h, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_fp8_f16 v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x72,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2.h, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_bf8_f16 v1, v2.h, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] +// GFX1250: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2.h, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_fp8_f16 v1, v2.h, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] +// GFX1250: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s index 73c79721f3b0e..301cfdd217b9f 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s @@ -96,3 +96,28 @@ v_ashr_pk_u8_i32 v1, v2, v3, v4 clamp // GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // GFX125X-ERR-NEXT:{{^}}v_ashr_pk_u8_i32 v1, v2, v3, v4 clamp // GFX125X-ERR-NEXT:{{^}} ^ + +v_cvt_sr_bf8_f16 v1, v2, v3 clamp +// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX125X-ERR-NEXT:{{^}}v_cvt_sr_bf8_f16 v1, v2, v3 clamp +// GFX125X-ERR-NEXT:{{^}} ^ + +v_cvt_sr_bf8_f16 v1, v2, v3 mul:2 +// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX125X-ERR-NEXT:{{^}}v_cvt_sr_bf8_f16 v1, v2, v3 mul:2 +// GFX125X-ERR-NEXT:{{^}} ^ + +v_cvt_sr_fp8_f16 v1, v2, v3 clamp +// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX125X-ERR-NEXT:{{^}}v_cvt_sr_fp8_f16 v1, v2, v3 clamp +// GFX125X-ERR-NEXT:{{^}} ^ + +v_cvt_sr_fp8_f16 v1, v2, v3 mul:2 +// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX125X-ERR-NEXT:{{^}}v_cvt_sr_fp8_f16 v1, v2, v3 mul:2 +// GFX125X-ERR-NEXT:{{^}} ^ + +v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:4 +// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid byte_sel value. +// GFX125X-ERR-NEXT:{{^}}v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:4 +// GFX125X-ERR-NEXT:{{^}} ^ diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt index c62f888524e81..10ffc2caa18bf 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt @@ -422,6 +422,170 @@ 0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04 # GFX1250: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x91,0xd6,0x02,0x07,0x12,0x04] -## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -# GFX1250-FAKE16: {{.*}} -# GFX1250-REAL16: {{.*}} +0x01,0x00,0x73,0xd7,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, v2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, v2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x01,0x00,0x00] + +0x01,0x40,0x73,0xd7,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.h, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x73,0xd7,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x73,0xd7,0x02,0x01,0x00,0x00] + +0x01,0x80,0x73,0xd7,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, v2 clamp ; encoding: [0x01,0x80,0x73,0xd7,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, v2 clamp ; encoding: [0x01,0x80,0x73,0xd7,0x02,0x01,0x00,0x00] + +0x01,0x00,0x73,0xd7,0x02,0x00,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, s2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x00,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, s2 ; encoding: [0x01,0x00,0x73,0xd7,0x02,0x00,0x00,0x00] + +0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, 0x5640 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, 0x5640 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00] + +0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, 1 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, 1 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00] + +0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, 0x3800 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, 0x3800 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] + +0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_bf8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_bf8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x73,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00] + +0x01,0x00,0x72,0xd7,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, v2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, v2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x01,0x00,0x00] + +0x01,0x40,0x72,0xd7,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.h, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x72,0xd7,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, v2 op_sel:[0,1] ; encoding: [0x01,0x40,0x72,0xd7,0x02,0x01,0x00,0x00] + +0x01,0x80,0x72,0xd7,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, v2 clamp ; encoding: [0x01,0x80,0x72,0xd7,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, v2 clamp ; encoding: [0x01,0x80,0x72,0xd7,0x02,0x01,0x00,0x00] + +0x01,0x00,0x72,0xd7,0x02,0x00,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, s2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x00,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, s2 ; encoding: [0x01,0x00,0x72,0xd7,0x02,0x00,0x00,0x00] + +0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, 0x5640 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, 0x5640 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x40,0x56,0x00,0x00] + +0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, 1 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, 1 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00] + +0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, 0x3800 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, 0x3800 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] + +0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_fp8_f16 v1.l, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_fp8_f16 v1, 0x3118 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x18,0x31,0x00,0x00] + +0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x20 +# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, -v2.l, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x20] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x20] + +0x01,0x08,0x75,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x75,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x75,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x00,0x75,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00 +# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.l, 0x1234 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, 0x1234 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00] + +0x01,0x00,0x75,0xd7,0x02,0x07,0x00,0x00 +# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.l, s3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x00,0x00] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, s3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x00,0x00] + +0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.l, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x20,0x75,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:1 ; encoding: [0x01,0x20,0x75,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x75,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x40,0x75,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:2 ; encoding: [0x01,0x40,0x75,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x75,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x60,0x75,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:3 ; encoding: [0x01,0x60,0x75,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x75,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x28,0x75,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x75,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x75,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x48,0x75,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x75,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x75,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x68,0x75,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x75,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x75,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x01,0x75,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, |v2.l|, v3 ; encoding: [0x01,0x01,0x75,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, |v2|, v3 ; encoding: [0x01,0x01,0x75,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x08,0x74,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x74,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x74,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x09,0x75,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_bf8_f16 v1, |v2.h|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x75,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16 v1, |v2|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x75,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x20 +# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, -v2.l, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x20] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, -v2, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x20] + +0x01,0x00,0x74,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00 +# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.l, 0x1234 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, 0x1234 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0xff,0x01,0x00,0x34,0x12,0x00,0x00] + +0x01,0x00,0x74,0xd7,0x02,0x07,0x00,0x00 +# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.l, s3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x00,0x00] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, s3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x00,0x00] + +0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.l, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x74,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x20,0x74,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:1 ; encoding: [0x01,0x20,0x74,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x74,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x40,0x74,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:2 ; encoding: [0x01,0x40,0x74,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x74,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x60,0x74,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:3 ; encoding: [0x01,0x60,0x74,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x74,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x01,0x74,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, |v2.l|, v3 ; encoding: [0x01,0x01,0x74,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, |v2|, v3 ; encoding: [0x01,0x01,0x74,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x28,0x74,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x74,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:1 ; encoding: [0x01,0x28,0x74,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x48,0x74,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x74,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:2 ; encoding: [0x01,0x48,0x74,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x68,0x74,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x74,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, v2, v3 op_sel:[1,0,0] byte_sel:3 ; encoding: [0x01,0x68,0x74,0xd7,0x02,0x07,0x02,0x00] + +0x01,0x09,0x74,0xd7,0x02,0x07,0x02,0x00 +# GFX1250-REAL16: v_cvt_sr_fp8_f16 v1, |v2.h|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x74,0xd7,0x02,0x07,0x02,0x00] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16 v1, |v2|, v3 op_sel:[1,0,0] ; encoding: [0x01,0x09,0x74,0xd7,0x02,0x07,0x02,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt index e57fa923f2ffe..e138425aee92c 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt @@ -269,3 +269,67 @@ 0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff # GFX1250: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff] + +0x01,0x00,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff +# GFX1250-REAL16: v_cvt_pk_bf8_f16_e64_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +# GFX1250-FAKE16: v_cvt_pk_bf8_f16_e64_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] + +0x01,0x40,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53 +# GFX1250-REAL16: v_cvt_pk_bf8_f16_e64_dpp v1.h, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53] +# GFX1250-FAKE16: v_cvt_pk_bf8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53] + +0x01,0x00,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff +# GFX1250-REAL16: v_cvt_pk_fp8_f16_e64_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +# GFX1250-FAKE16: v_cvt_pk_fp8_f16_e64_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] + +0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53 +# GFX1250-REAL16: v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53] +# GFX1250-FAKE16: v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] row_share:0 row_mask:0x5 bank_mask:0x3 fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xfa,0x00,0x00,0x00,0x02,0x50,0x05,0x53] + +0x01,0x20,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff +# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] + +0x01,0x08,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff +# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] + +0x01,0x40,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff +# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] + +0x01,0x60,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff +# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] + +0x01,0x68,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff +# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] + +0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff +# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] + +0x01,0x08,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff +# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] + +0x01,0x20,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff +# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] + +0x01,0x40,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff +# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] + +0x01,0x60,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff +# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] + +0x01,0x00,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff +# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] + +0x01,0x68,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff +# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x68,0x74,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt index 25012f00e4947..c6bde2241fa4b 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt @@ -187,3 +187,67 @@ 0x05,0x00,0x91,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 # GFX1250: v_ashr_pk_u8_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x01,0x00,0x73,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_pk_bf8_f16_e64_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x73,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_pk_bf8_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x73,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] + +0x01,0x40,0x73,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_pk_bf8_f16_e64_dpp v1.h, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_pk_bf8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x73,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] + +0x01,0x00,0x72,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_pk_fp8_f16_e64_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x72,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_pk_fp8_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x72,0xd7,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] + +0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_pk_fp8_f16_e64_dpp v1.h, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_pk_fp8_f16_e64_dpp v1, v2 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x40,0x72,0xd7,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] + +0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f +# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] + +0x01,0x08,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f +# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x75,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] + +0x01,0x20,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f +# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] + +0x01,0x40,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f +# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] + +0x01,0x60,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f +# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] + +0x01,0x68,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f +# GFX1250-REAL16: v_cvt_sr_bf8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +# GFX1250-FAKE16: v_cvt_sr_bf8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x75,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] + +0x01,0x00,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f +# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x00,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] + +0x01,0x08,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f +# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] dpp8:[1,2,3,4,5,6,7,0] fi:1 ; encoding: [0x01,0x08,0x74,0xd7,0xea,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] + +0x01,0x20,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f +# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x20,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] + +0x01,0x40,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f +# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x40,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] + +0x01,0x60,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f +# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x60,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] + +0x01,0x68,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f +# GFX1250-REAL16: v_cvt_sr_fp8_f16_e64_dpp v1, v2.h, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f] +# GFX1250-FAKE16: v_cvt_sr_fp8_f16_e64_dpp v1, v2, v3 op_sel:[1,0,0] byte_sel:3 dpp8:[1,2,3,4,5,6,7,0] ; encoding: [0x01,0x68,0x74,0xd7,0xe9,0x06,0x02,0x00,0x02,0xd1,0x58,0x1f]