diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index ffe6b0649cb94..fef0d7eb45a8c 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -598,6 +598,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2 // encodings + if (isGFX1250() && Bytes.size() >= 16) { + DecoderUInt128 DecW = eat16Bytes(Bytes); + if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS)) + break; + Bytes = Bytes_.slice(0, MaxInstBytesNum); + } + if (isGFX11Plus() && Bytes.size() >= 12 ) { DecoderUInt128 DecW = eat12Bytes(Bytes); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp index ffdac8b8ce324..fa0c95f54d9e7 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -75,8 +75,9 @@ unsigned AMDGPUMCAsmInfo::getMaxInstLength(const MCSubtargetInfo *STI) const { if (STI->hasFeature(AMDGPU::FeatureNSAEncoding)) return 20; - // VOP3PX encoding. - if (STI->hasFeature(AMDGPU::FeatureGFX950Insts)) + // VOP3PX/VOP3PX2 encoding. + if (STI->hasFeature(AMDGPU::FeatureGFX950Insts) || + STI->hasFeature(AMDGPU::FeatureGFX1250Insts)) return 16; // 64-bit instruction with 32-bit literal. diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 757fb37f37162..5d186c337a99c 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -1780,6 +1780,8 @@ multiclass WMMA_F8F6F4_Profiles; +defm F32_16X16X128_F8F6F4_SCALE : WMMA_F8F6F4_Profiles<1, 0, 1>; +defm F32_16X16X128_F8F6F4_SCALE16 : WMMA_F8F6F4_Profiles<1, 1, 1>; class VOP_WMMA_LD_SCALE : VOP3P_Profile> { let HasMatrixScale = 1; @@ -1844,7 +1846,8 @@ defm V_SWMMAC_F32_16X16X64_F16_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x64 defm V_SWMMAC_F16_16X16X64_F16_w32 : SWMMACInstGFX12<"v_swmmac_f16_16x16x64_f16", F16_F16X64_SWMMAC_w32, "_w32">; defm V_WMMA_F32_16X16X128_F8F6F4 : WMMAInst_SrcFormats_mc<"v_wmma_f32_16x16x128_f8f6f4", "F32_16X16X128_F8F6F4">; - +defm V_WMMA_SCALE_F32_16X16X128_F8F6F4 : WMMAInst_SrcFormats_mc<"v_wmma_scale_f32_16x16x128_f8f6f4", "F32_16X16X128_F8F6F4_SCALE">; +defm V_WMMA_SCALE16_F32_16X16X128_F8F6F4 : WMMAInst_SrcFormats_mc<"v_wmma_scale16_f32_16x16x128_f8f6f4", "F32_16X16X128_F8F6F4_SCALE16">; } // End is_wmma_xdl = 1. defm V_WMMA_LD_SCALE_PAIRED_B32 : VOP3PInst<"v_wmma_ld_scale_paired_b32", VOP_WMMA_LD_SCALE>; @@ -2138,6 +2141,73 @@ multiclass VOP3P_Real_WMMA_gfx1250_SrcFormats op, string WMMAP> { } } +class VOP3PX2e op, bits<8> LdScaleOp, VOP3PWMMA_Profile P> : Enc128, VOP3Pe_Base { + bits<9> scale_src0; + bits<9> scale_src1; + + // Inst{7-0} = unused + let Inst{10-8} = {0, matrix_b_scale_fmt{1-0}}; // neg_hi + let Inst{11} = matrix_a_scale{0}; // scale_op_sel(0) + let Inst{12} = 0; // scale_op_sel(1) + let Inst{13} = matrix_a_reuse; // scale_op_sel(2) + let Inst{14} = matrix_b_reuse; // scale_op_sel_hi(2) + let Inst{15} = 0; // scale_clamp + let Inst{31-24} = 0xcc; // Encoding + let Inst{23-16} = LdScaleOp; + let Inst{40-32} = scale_src0; + let Inst{49-41} = scale_src1; + let Inst{58-50} = 0; // scale src2 + let Inst{59} = matrix_b_scale{0}; // scale_op_sel_hi(0) + let Inst{60} = 0; // scale_op_sel_hi(1) + let Inst{63-61} = {0, matrix_a_scale_fmt{1-0}}; // neg (lo) + + // The high half of the encoding is the unscaled wmma op. + let Inst{71-64} = vdst; + + let Inst{72} = !if(P.NegHi01, src0_modifiers{1}, 0); // neg_hi src0 + let Inst{73} = !if(P.NegHi01, src1_modifiers{1}, 0); // neg_hi src1 + let Inst{74} = !if(P.NegHi2, src2_modifiers{1}, 0); // neg_hi src2 + + let Inst{77-75} = !if(P.HasMatrixFMT, matrix_a_fmt{2-0}, 0); // op_sel + + let Inst{78,124,123} = !if(P.HasMatrixFMT, matrix_b_fmt{2-0}, 7); // op_sel_hi + let Inst{79} = !if(P.HasClamp, clamp{0}, 0); + + let Inst{87-80} = op; + let Inst{95-88} = 0xcc; //encoding + let Inst{104-96} = !if(P.HasSrc0, src0, 0); + let Inst{113-105} = !if(P.HasSrc1, src1, 0); + let Inst{122-114} = !if(P.HasSrc2, src2, 0); + + // neg_lo + let Inst{125} = !if(P.NegLo01, src0_modifiers{0}, 0); + let Inst{126} = !if(P.NegLo01, src1_modifiers{0}, 0); + let Inst{127} = !if(P.NegLo2, src2_modifiers{0}, 0); +} + +multiclass VOP3PX2_Real_ScaledWMMA op, bits<8> LdScaleOp, VOP3PWMMA_Profile WMMAP> { + defvar PS = !cast(NAME # "_twoaddr"); + defvar asmName = !substr(PS.Mnemonic, 0, !sub(!size(PS.Mnemonic), !size("_f8_f8_w32"))); + defvar psName = !substr(NAME, 0, !sub(!size(PS.Mnemonic), !size("_f8_f8_w32"))); + let SubtargetPredicate = isGFX1250Plus, WaveSizePredicate = isWave32, + DecoderNamespace = "GFX1250" in { + def _gfx1250 : VOP3P_Real_Gen, + VOP3PX2e , + MFMA_F8F6F4_WithSizeTable_Helper { + let AsmString = asmName # PS.AsmOperands; + } + } +} + +multiclass VOP3PX2_Real_ScaledWMMA_SrcFormats op, bits<8> LdScaleOp, string WMMAP> { + defm _f8_f8_w32 : VOP3PX2_Real_ScaledWMMA(WMMAP # "_f8_f8_w32")>; + foreach I = ["f8_f6", "f8_f4", "f6_f8", "f6_f6", "f6_f4", "f4_f8", "f4_f6", "f4_f4"] in { + let isAsmParserOnly = true in { // Disable ambiguous disassembly. + defm _#I#_w32 : VOP3PX2_Real_ScaledWMMA(WMMAP # "_" # I # "_w32")>; + } + } +} + defm V_WMMA_F32_16X16X16_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x040, F32_F16_WMMA_w32>; defm V_WMMA_F32_16X16X16_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x041, F32_BF16_WMMA_w32>; defm V_WMMA_F16_16X16X16_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x042, F16_F16_WMMA_w32>; @@ -2213,6 +2283,8 @@ defm V_WMMA_F16_16X16X128_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x087, F16_FP8B defm V_WMMA_F32_32X16X128_F4_w32 : VOP3P_Real_WMMA_gfx1250 <0x088, F32_32X16X128_F4_WMMA_w32>; defm V_WMMA_F32_16X16X128_F8F6F4 : VOP3P_Real_WMMA_gfx1250_SrcFormats<0x033, "F32_16X16X128_F8F6F4">; +defm V_WMMA_SCALE_F32_16X16X128_F8F6F4 : VOP3PX2_Real_ScaledWMMA_SrcFormats<0x033, 0x35, "F32_16X16X128_F8F6F4_SCALE">; +defm V_WMMA_SCALE16_F32_16X16X128_F8F6F4 : VOP3PX2_Real_ScaledWMMA_SrcFormats<0x033, 0x3a, "F32_16X16X128_F8F6F4_SCALE16">; defm V_SWMMAC_F32_16X16X64_F16_w32 : VOP3P_Real_WMMA_gfx1250 <0x065, F32_F16X64_SWMMAC_w32>; defm V_SWMMAC_F32_16X16X64_BF16_w32 : VOP3P_Real_WMMA_gfx1250 <0x066, F32_BF16X64_SWMMAC_w32>; diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx1250.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx1250.ll index 2ad7818bd3ca8..243f0ed3a8d0d 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx1250.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx1250.ll @@ -25,8 +25,11 @@ define amdgpu_kernel void @uniform_conditional_max_short_forward_branch(ptr addr ; GCN-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: s_cmp_eq_u32 s0, 0 -; GCN-NEXT: s_cbranch_scc1 .LBB0_2 -; GCN-NEXT: ; %bb.1: ; %bb2 +; GCN-NEXT: s_cbranch_scc0 .LBB0_1 +; GCN-NEXT: ; %bb.3: ; %bb +; GCN-NEXT: s_add_pc_i64 .LBB0_2-.Lpost_addpc0 +; GCN-NEXT: .Lpost_addpc0: +; GCN-NEXT: .LBB0_1: ; %bb2 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: v_nop_e64 ; GCN-NEXT: v_nop_e64 @@ -64,8 +67,8 @@ define amdgpu_kernel void @uniform_conditional_min_long_forward_branch(ptr addrs ; GCN-NEXT: s_cmp_eq_u32 s0, 0 ; GCN-NEXT: s_cbranch_scc0 .LBB1_1 ; GCN-NEXT: ; %bb.3: ; %bb0 -; GCN-NEXT: s_add_pc_i64 .LBB1_2-.Lpost_addpc0 -; GCN-NEXT: .Lpost_addpc0: +; GCN-NEXT: s_add_pc_i64 .LBB1_2-.Lpost_addpc1 +; GCN-NEXT: .Lpost_addpc1: ; GCN-NEXT: .LBB1_1: ; %bb2 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: v_nop_e64 @@ -106,8 +109,8 @@ define amdgpu_kernel void @uniform_conditional_min_long_forward_vcnd_branch(ptr ; GCN-NEXT: s_cmp_eq_f32 s0, 0 ; GCN-NEXT: s_cbranch_scc0 .LBB2_1 ; GCN-NEXT: ; %bb.3: ; %bb0 -; GCN-NEXT: s_add_pc_i64 .LBB2_2-.Lpost_addpc1 -; GCN-NEXT: .Lpost_addpc1: +; GCN-NEXT: s_add_pc_i64 .LBB2_2-.Lpost_addpc2 +; GCN-NEXT: .Lpost_addpc2: ; GCN-NEXT: .LBB2_1: ; %bb2 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; 32 bytes @@ -157,8 +160,8 @@ define amdgpu_kernel void @min_long_forward_vbranch(ptr addrspace(1) %arg) #0 { ; GCN-NEXT: v_cmpx_ne_u32_e32 0, v2 ; GCN-NEXT: s_cbranch_execnz .LBB3_1 ; GCN-NEXT: ; %bb.3: ; %bb -; GCN-NEXT: s_add_pc_i64 .LBB3_2-.Lpost_addpc2 -; GCN-NEXT: .Lpost_addpc2: +; GCN-NEXT: s_add_pc_i64 .LBB3_2-.Lpost_addpc3 +; GCN-NEXT: .Lpost_addpc3: ; GCN-NEXT: .LBB3_1: ; %bb2 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; 32 bytes @@ -209,8 +212,8 @@ define amdgpu_kernel void @long_backward_sbranch(ptr addrspace(1) %arg) #0 { ; GCN-NEXT: s_cbranch_scc0 .LBB4_2 ; GCN-NEXT: ; %bb.3: ; %bb2 ; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1 -; GCN-NEXT: s_add_pc_i64 .LBB4_1-.Lpost_addpc3 -; GCN-NEXT: .Lpost_addpc3: +; GCN-NEXT: s_add_pc_i64 .LBB4_1-.Lpost_addpc4 +; GCN-NEXT: .Lpost_addpc4: ; GCN-NEXT: .LBB4_2: ; %bb3 ; GCN-NEXT: s_endpgm bb: @@ -242,8 +245,8 @@ define amdgpu_kernel void @uniform_unconditional_min_long_forward_branch(ptr add ; GCN-NEXT: s_mov_b32 s0, -1 ; GCN-NEXT: s_cbranch_scc0 .LBB5_1 ; GCN-NEXT: ; %bb.7: ; %bb0 -; GCN-NEXT: s_add_pc_i64 .LBB5_4-.Lpost_addpc5 -; GCN-NEXT: .Lpost_addpc5: +; GCN-NEXT: s_add_pc_i64 .LBB5_4-.Lpost_addpc6 +; GCN-NEXT: .Lpost_addpc6: ; GCN-NEXT: .LBB5_1: ; %Flow ; GCN-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 ; GCN-NEXT: s_cbranch_vccnz .LBB5_3 @@ -268,11 +271,11 @@ define amdgpu_kernel void @uniform_unconditional_min_long_forward_branch(ptr add ; GCN-NEXT: s_sleep 0 ; GCN-NEXT: s_cbranch_execnz .LBB5_5 ; GCN-NEXT: ; %bb.9: ; %bb3 -; GCN-NEXT: s_add_pc_i64 .LBB5_2-.Lpost_addpc6 -; GCN-NEXT: .Lpost_addpc6: +; GCN-NEXT: s_add_pc_i64 .LBB5_2-.Lpost_addpc7 +; GCN-NEXT: .Lpost_addpc7: ; GCN-NEXT: .LBB5_5: ; %bb3 -; GCN-NEXT: s_add_pc_i64 .LBB5_3-.Lpost_addpc4 -; GCN-NEXT: .Lpost_addpc4: +; GCN-NEXT: s_add_pc_i64 .LBB5_3-.Lpost_addpc5 +; GCN-NEXT: .Lpost_addpc5: bb0: %tmp = icmp ne i32 %arg1, 0 br i1 %tmp, label %bb2, label %bb3 @@ -310,8 +313,8 @@ define amdgpu_kernel void @uniform_unconditional_min_long_backward_branch(ptr ad ; GCN-NEXT: s_cbranch_vccz .LBB6_2 ; GCN-NEXT: ; %bb.3: ; %loop ; GCN-NEXT: ; in Loop: Header=BB6_1 Depth=1 -; GCN-NEXT: s_add_pc_i64 .LBB6_1-.Lpost_addpc7 -; GCN-NEXT: .Lpost_addpc7: +; GCN-NEXT: s_add_pc_i64 .LBB6_1-.Lpost_addpc8 +; GCN-NEXT: .Lpost_addpc8: ; GCN-NEXT: .LBB6_2: ; %DummyReturnBlock ; GCN-NEXT: s_endpgm entry: @@ -350,8 +353,8 @@ define amdgpu_kernel void @expand_requires_expand(i32 %cond0) #0 { ; GCN-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 ; GCN-NEXT: s_cbranch_vccz .LBB7_3 ; GCN-NEXT: ; %bb.5: ; %Flow -; GCN-NEXT: s_add_pc_i64 .LBB7_4-.Lpost_addpc8 -; GCN-NEXT: .Lpost_addpc8: +; GCN-NEXT: s_add_pc_i64 .LBB7_4-.Lpost_addpc9 +; GCN-NEXT: .Lpost_addpc9: ; GCN-NEXT: .LBB7_3: ; %bb2 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: v_nop_e64 @@ -406,8 +409,8 @@ define amdgpu_kernel void @uniform_inside_divergent(ptr addrspace(1) %out, i32 % ; GCN-NEXT: v_cmpx_gt_u32_e32 16, v0 ; GCN-NEXT: s_cbranch_execnz .LBB8_1 ; GCN-NEXT: ; %bb.4: ; %entry -; GCN-NEXT: s_add_pc_i64 .LBB8_3-.Lpost_addpc9 -; GCN-NEXT: .Lpost_addpc9: +; GCN-NEXT: s_add_pc_i64 .LBB8_3-.Lpost_addpc10 +; GCN-NEXT: .Lpost_addpc10: ; GCN-NEXT: .LBB8_1: ; %if ; GCN-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 ; GCN-NEXT: v_mov_b32_e32 v0, 0 @@ -465,8 +468,8 @@ define amdgpu_kernel void @analyze_mask_branch() #0 { ; GCN-NEXT: s_and_not1_saveexec_b32 s0, s0 ; GCN-NEXT: s_cbranch_execnz .LBB9_3 ; GCN-NEXT: ; %bb.6: ; %Flow1 -; GCN-NEXT: s_add_pc_i64 .LBB9_5-.Lpost_addpc10 -; GCN-NEXT: .Lpost_addpc10: +; GCN-NEXT: s_add_pc_i64 .LBB9_5-.Lpost_addpc11 +; GCN-NEXT: .Lpost_addpc11: ; GCN-NEXT: .LBB9_3: ; %loop.preheader ; GCN-NEXT: s_mov_b32 vcc_lo, 0 ; GCN-NEXT: .LBB9_4: ; %loop @@ -484,8 +487,8 @@ define amdgpu_kernel void @analyze_mask_branch() #0 { ; GCN-NEXT: s_cbranch_vccnz .LBB9_5 ; GCN-NEXT: ; %bb.8: ; %loop ; GCN-NEXT: ; in Loop: Header=BB9_4 Depth=1 -; GCN-NEXT: s_add_pc_i64 .LBB9_4-.Lpost_addpc11 -; GCN-NEXT: .Lpost_addpc11: +; GCN-NEXT: s_add_pc_i64 .LBB9_4-.Lpost_addpc12 +; GCN-NEXT: .Lpost_addpc12: ; GCN-NEXT: .LBB9_5: ; %UnifiedReturnBlock ; GCN-NEXT: s_endpgm entry: @@ -528,20 +531,20 @@ define amdgpu_kernel void @long_branch_hang(ptr addrspace(1) nocapture %arg, i32 ; GCN-NEXT: s_cmp_lt_i32 s3, 6 ; GCN-NEXT: s_cbranch_scc0 .LBB10_1 ; GCN-NEXT: ; %bb.10: ; %bb -; GCN-NEXT: s_add_pc_i64 .LBB10_4-.Lpost_addpc13 -; GCN-NEXT: .Lpost_addpc13: +; GCN-NEXT: s_add_pc_i64 .LBB10_4-.Lpost_addpc14 +; GCN-NEXT: .Lpost_addpc14: ; GCN-NEXT: .LBB10_1: ; %Flow ; GCN-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s7 ; GCN-NEXT: s_cbranch_vccnz .LBB10_2 ; GCN-NEXT: ; %bb.12: ; %Flow -; GCN-NEXT: s_add_pc_i64 .LBB10_5-.Lpost_addpc14 -; GCN-NEXT: .Lpost_addpc14: +; GCN-NEXT: s_add_pc_i64 .LBB10_5-.Lpost_addpc15 +; GCN-NEXT: .Lpost_addpc15: ; GCN-NEXT: .LBB10_2: ; %Flow5 ; GCN-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 ; GCN-NEXT: s_cbranch_vccz .LBB10_3 ; GCN-NEXT: ; %bb.14: ; %Flow5 -; GCN-NEXT: s_add_pc_i64 .LBB10_6-.Lpost_addpc15 -; GCN-NEXT: .Lpost_addpc15: +; GCN-NEXT: s_add_pc_i64 .LBB10_6-.Lpost_addpc16 +; GCN-NEXT: .Lpost_addpc16: ; GCN-NEXT: .LBB10_3: ; %bb14 ; GCN-NEXT: s_cmp_lt_i32 s1, 9 ; GCN-NEXT: s_cselect_b32 s0, -1, 0 @@ -553,8 +556,8 @@ define amdgpu_kernel void @long_branch_hang(ptr addrspace(1) nocapture %arg, i32 ; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GCN-NEXT: ; %bb.8: ; %bb14 -; GCN-NEXT: s_add_pc_i64 .LBB10_7-.Lpost_addpc12 -; GCN-NEXT: .Lpost_addpc12: +; GCN-NEXT: s_add_pc_i64 .LBB10_7-.Lpost_addpc13 +; GCN-NEXT: .Lpost_addpc13: ; GCN-NEXT: .LBB10_4: ; %bb13 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: v_nop_e64 @@ -565,8 +568,8 @@ define amdgpu_kernel void @long_branch_hang(ptr addrspace(1) nocapture %arg, i32 ; GCN-NEXT: s_sleep 0 ; GCN-NEXT: s_cbranch_execz .LBB10_5 ; GCN-NEXT: ; %bb.16: ; %bb13 -; GCN-NEXT: s_add_pc_i64 .LBB10_2-.Lpost_addpc16 -; GCN-NEXT: .Lpost_addpc16: +; GCN-NEXT: s_add_pc_i64 .LBB10_2-.Lpost_addpc17 +; GCN-NEXT: .Lpost_addpc17: ; GCN-NEXT: .LBB10_5: ; %bb9 ; GCN-NEXT: s_cmp_lt_i32 s3, 11 ; GCN-NEXT: s_cselect_b32 s0, -1, 0 @@ -577,8 +580,8 @@ define amdgpu_kernel void @long_branch_hang(ptr addrspace(1) nocapture %arg, i32 ; GCN-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 ; GCN-NEXT: s_cbranch_vccnz .LBB10_6 ; GCN-NEXT: ; %bb.18: ; %bb9 -; GCN-NEXT: s_add_pc_i64 .LBB10_3-.Lpost_addpc17 -; GCN-NEXT: .Lpost_addpc17: +; GCN-NEXT: s_add_pc_i64 .LBB10_3-.Lpost_addpc18 +; GCN-NEXT: .Lpost_addpc18: ; GCN-NEXT: .LBB10_6: ; GCN-NEXT: ; implicit-def: $vgpr0 ; GCN-NEXT: .LBB10_7: ; %bb19 diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s b/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s index 309c74ae7ff7a..93e65d3444b89 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s @@ -983,6 +983,11 @@ v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47] neg_lo:[0,0,1] // WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 // GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47] neg_hi:[0,0,1] +// GFX1250: v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47] neg_hi:[0,0,1] ; encoding: [0x00,0x04,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + v_wmma_ld_scale_paired_b32 v1, v2 // GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x00] // WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 @@ -1153,8 +1158,273 @@ v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 mat // WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 // GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU -v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47] neg_hi:[0,0,1] -// GFX1250: v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47] neg_hi:[0,0,1] ; encoding: [0x00,0x04,0x33,0xcc,0x08,0x31,0xa2,0x04] +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s1, s2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s1, s2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP8 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF8 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP6 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF6 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP4 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP8 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF8 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP6 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF6 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP4 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW0 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_reuse +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW0 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_reuse +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x20,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x40,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s[2:3], s[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s[2:3], s[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP8 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF8 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP6 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF6 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP4 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP8 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF8 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP6 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF6 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP4 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW0 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_reuse +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW0 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_reuse +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x20,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x40,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 +// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU + +v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] // WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 // GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt index 755a2a33cdcc7..2216348fa43c3 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt @@ -676,6 +676,144 @@ 0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x00 # GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x00] +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c] + +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14] + +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c] + +0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s[2:3], s[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] + +0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] + +0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] + +0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] + +0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x40,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x40,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] + +0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x20,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x20,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] + +0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] + +0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] + +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c] + +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14] + +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c] + +0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04] + +0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s1, s2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] + +0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] + +0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] + +0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] + +0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x40,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x40,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] + +0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x20,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x20,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] + +0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] + +0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] + 0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x1b # GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x1b]