Skip to content

Commit 37fe9f6

Browse files
authored
[AMDGPU] Add gfx1250 v_wmma_scale[16]_f32_16x16x128_f8f6f4 MC support (#152014)
This adds new VOP3PX2e encoding
1 parent 58aeb79 commit 37fe9f6

File tree

6 files changed

+534
-43
lines changed

6 files changed

+534
-43
lines changed

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
598598

599599
// Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
600600
// encodings
601+
if (isGFX1250() && Bytes.size() >= 16) {
602+
DecoderUInt128 DecW = eat16Bytes(Bytes);
603+
if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
604+
break;
605+
Bytes = Bytes_.slice(0, MaxInstBytesNum);
606+
}
607+
601608
if (isGFX11Plus() && Bytes.size() >= 12 ) {
602609
DecoderUInt128 DecW = eat12Bytes(Bytes);
603610

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,9 @@ unsigned AMDGPUMCAsmInfo::getMaxInstLength(const MCSubtargetInfo *STI) const {
7575
if (STI->hasFeature(AMDGPU::FeatureNSAEncoding))
7676
return 20;
7777

78-
// VOP3PX encoding.
79-
if (STI->hasFeature(AMDGPU::FeatureGFX950Insts))
78+
// VOP3PX/VOP3PX2 encoding.
79+
if (STI->hasFeature(AMDGPU::FeatureGFX950Insts) ||
80+
STI->hasFeature(AMDGPU::FeatureGFX1250Insts))
8081
return 16;
8182

8283
// 64-bit instruction with 32-bit literal.

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1780,6 +1780,8 @@ multiclass WMMA_F8F6F4_Profiles<bit HasMatrixScale, bit Scale16, bit HasMatrixRe
17801780
}
17811781

17821782
defm F32_16X16X128_F8F6F4 : WMMA_F8F6F4_Profiles<0, 0, 0>;
1783+
defm F32_16X16X128_F8F6F4_SCALE : WMMA_F8F6F4_Profiles<1, 0, 1>;
1784+
defm F32_16X16X128_F8F6F4_SCALE16 : WMMA_F8F6F4_Profiles<1, 1, 1>;
17831785

17841786
class VOP_WMMA_LD_SCALE<ValueType vt, RegisterOperand RC> : VOP3P_Profile<VOPProfile<[untyped, vt, vt, untyped]>> {
17851787
let HasMatrixScale = 1;
@@ -1844,7 +1846,8 @@ defm V_SWMMAC_F32_16X16X64_F16_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x64
18441846
defm V_SWMMAC_F16_16X16X64_F16_w32 : SWMMACInstGFX12<"v_swmmac_f16_16x16x64_f16", F16_F16X64_SWMMAC_w32, "_w32">;
18451847

18461848
defm V_WMMA_F32_16X16X128_F8F6F4 : WMMAInst_SrcFormats_mc<"v_wmma_f32_16x16x128_f8f6f4", "F32_16X16X128_F8F6F4">;
1847-
1849+
defm V_WMMA_SCALE_F32_16X16X128_F8F6F4 : WMMAInst_SrcFormats_mc<"v_wmma_scale_f32_16x16x128_f8f6f4", "F32_16X16X128_F8F6F4_SCALE">;
1850+
defm V_WMMA_SCALE16_F32_16X16X128_F8F6F4 : WMMAInst_SrcFormats_mc<"v_wmma_scale16_f32_16x16x128_f8f6f4", "F32_16X16X128_F8F6F4_SCALE16">;
18481851
} // End is_wmma_xdl = 1.
18491852

18501853
defm V_WMMA_LD_SCALE_PAIRED_B32 : VOP3PInst<"v_wmma_ld_scale_paired_b32", VOP_WMMA_LD_SCALE<i32, VCSrc_b32>>;
@@ -2138,6 +2141,73 @@ multiclass VOP3P_Real_WMMA_gfx1250_SrcFormats<bits<8> op, string WMMAP> {
21382141
}
21392142
}
21402143

2144+
class VOP3PX2e <bits<8> op, bits<8> LdScaleOp, VOP3PWMMA_Profile P> : Enc128, VOP3Pe_Base {
2145+
bits<9> scale_src0;
2146+
bits<9> scale_src1;
2147+
2148+
// Inst{7-0} = unused
2149+
let Inst{10-8} = {0, matrix_b_scale_fmt{1-0}}; // neg_hi
2150+
let Inst{11} = matrix_a_scale{0}; // scale_op_sel(0)
2151+
let Inst{12} = 0; // scale_op_sel(1)
2152+
let Inst{13} = matrix_a_reuse; // scale_op_sel(2)
2153+
let Inst{14} = matrix_b_reuse; // scale_op_sel_hi(2)
2154+
let Inst{15} = 0; // scale_clamp
2155+
let Inst{31-24} = 0xcc; // Encoding
2156+
let Inst{23-16} = LdScaleOp;
2157+
let Inst{40-32} = scale_src0;
2158+
let Inst{49-41} = scale_src1;
2159+
let Inst{58-50} = 0; // scale src2
2160+
let Inst{59} = matrix_b_scale{0}; // scale_op_sel_hi(0)
2161+
let Inst{60} = 0; // scale_op_sel_hi(1)
2162+
let Inst{63-61} = {0, matrix_a_scale_fmt{1-0}}; // neg (lo)
2163+
2164+
// The high half of the encoding is the unscaled wmma op.
2165+
let Inst{71-64} = vdst;
2166+
2167+
let Inst{72} = !if(P.NegHi01, src0_modifiers{1}, 0); // neg_hi src0
2168+
let Inst{73} = !if(P.NegHi01, src1_modifiers{1}, 0); // neg_hi src1
2169+
let Inst{74} = !if(P.NegHi2, src2_modifiers{1}, 0); // neg_hi src2
2170+
2171+
let Inst{77-75} = !if(P.HasMatrixFMT, matrix_a_fmt{2-0}, 0); // op_sel
2172+
2173+
let Inst{78,124,123} = !if(P.HasMatrixFMT, matrix_b_fmt{2-0}, 7); // op_sel_hi
2174+
let Inst{79} = !if(P.HasClamp, clamp{0}, 0);
2175+
2176+
let Inst{87-80} = op;
2177+
let Inst{95-88} = 0xcc; //encoding
2178+
let Inst{104-96} = !if(P.HasSrc0, src0, 0);
2179+
let Inst{113-105} = !if(P.HasSrc1, src1, 0);
2180+
let Inst{122-114} = !if(P.HasSrc2, src2, 0);
2181+
2182+
// neg_lo
2183+
let Inst{125} = !if(P.NegLo01, src0_modifiers{0}, 0);
2184+
let Inst{126} = !if(P.NegLo01, src1_modifiers{0}, 0);
2185+
let Inst{127} = !if(P.NegLo2, src2_modifiers{0}, 0);
2186+
}
2187+
2188+
multiclass VOP3PX2_Real_ScaledWMMA<bits<8> op, bits<8> LdScaleOp, VOP3PWMMA_Profile WMMAP> {
2189+
defvar PS = !cast<VOP3P_Pseudo>(NAME # "_twoaddr");
2190+
defvar asmName = !substr(PS.Mnemonic, 0, !sub(!size(PS.Mnemonic), !size("_f8_f8_w32")));
2191+
defvar psName = !substr(NAME, 0, !sub(!size(PS.Mnemonic), !size("_f8_f8_w32")));
2192+
let SubtargetPredicate = isGFX1250Plus, WaveSizePredicate = isWave32,
2193+
DecoderNamespace = "GFX1250" in {
2194+
def _gfx1250 : VOP3P_Real_Gen<PS, GFX1250Gen, asmName>,
2195+
VOP3PX2e <op, LdScaleOp, WMMAP>,
2196+
MFMA_F8F6F4_WithSizeTable_Helper<PS, psName # "_f8_f8_w32_gfx1250"> {
2197+
let AsmString = asmName # PS.AsmOperands;
2198+
}
2199+
}
2200+
}
2201+
2202+
multiclass VOP3PX2_Real_ScaledWMMA_SrcFormats<bits<8> op, bits<8> LdScaleOp, string WMMAP> {
2203+
defm _f8_f8_w32 : VOP3PX2_Real_ScaledWMMA<op, LdScaleOp, !cast<VOP3PWMMA_Profile>(WMMAP # "_f8_f8_w32")>;
2204+
foreach I = ["f8_f6", "f8_f4", "f6_f8", "f6_f6", "f6_f4", "f4_f8", "f4_f6", "f4_f4"] in {
2205+
let isAsmParserOnly = true in { // Disable ambiguous disassembly.
2206+
defm _#I#_w32 : VOP3PX2_Real_ScaledWMMA<op, LdScaleOp, !cast<VOP3PWMMA_Profile>(WMMAP # "_" # I # "_w32")>;
2207+
}
2208+
}
2209+
}
2210+
21412211
defm V_WMMA_F32_16X16X16_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x040, F32_F16_WMMA_w32>;
21422212
defm V_WMMA_F32_16X16X16_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x041, F32_BF16_WMMA_w32>;
21432213
defm V_WMMA_F16_16X16X16_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x042, F16_F16_WMMA_w32>;
@@ -2213,6 +2283,8 @@ defm V_WMMA_F16_16X16X128_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx1250 <0x087, F16_FP8B
22132283
defm V_WMMA_F32_32X16X128_F4_w32 : VOP3P_Real_WMMA_gfx1250 <0x088, F32_32X16X128_F4_WMMA_w32>;
22142284

22152285
defm V_WMMA_F32_16X16X128_F8F6F4 : VOP3P_Real_WMMA_gfx1250_SrcFormats<0x033, "F32_16X16X128_F8F6F4">;
2286+
defm V_WMMA_SCALE_F32_16X16X128_F8F6F4 : VOP3PX2_Real_ScaledWMMA_SrcFormats<0x033, 0x35, "F32_16X16X128_F8F6F4_SCALE">;
2287+
defm V_WMMA_SCALE16_F32_16X16X128_F8F6F4 : VOP3PX2_Real_ScaledWMMA_SrcFormats<0x033, 0x3a, "F32_16X16X128_F8F6F4_SCALE16">;
22162288

22172289
defm V_SWMMAC_F32_16X16X64_F16_w32 : VOP3P_Real_WMMA_gfx1250 <0x065, F32_F16X64_SWMMAC_w32>;
22182290
defm V_SWMMAC_F32_16X16X64_BF16_w32 : VOP3P_Real_WMMA_gfx1250 <0x066, F32_BF16X64_SWMMAC_w32>;

llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx1250.ll

Lines changed: 41 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,11 @@ define amdgpu_kernel void @uniform_conditional_max_short_forward_branch(ptr addr
2525
; GCN-NEXT: s_load_b32 s0, s[4:5], 0x2c
2626
; GCN-NEXT: s_wait_kmcnt 0x0
2727
; GCN-NEXT: s_cmp_eq_u32 s0, 0
28-
; GCN-NEXT: s_cbranch_scc1 .LBB0_2
29-
; GCN-NEXT: ; %bb.1: ; %bb2
28+
; GCN-NEXT: s_cbranch_scc0 .LBB0_1
29+
; GCN-NEXT: ; %bb.3: ; %bb
30+
; GCN-NEXT: s_add_pc_i64 .LBB0_2-.Lpost_addpc0
31+
; GCN-NEXT: .Lpost_addpc0:
32+
; GCN-NEXT: .LBB0_1: ; %bb2
3033
; GCN-NEXT: ;;#ASMSTART
3134
; GCN-NEXT: v_nop_e64
3235
; GCN-NEXT: v_nop_e64
@@ -64,8 +67,8 @@ define amdgpu_kernel void @uniform_conditional_min_long_forward_branch(ptr addrs
6467
; GCN-NEXT: s_cmp_eq_u32 s0, 0
6568
; GCN-NEXT: s_cbranch_scc0 .LBB1_1
6669
; GCN-NEXT: ; %bb.3: ; %bb0
67-
; GCN-NEXT: s_add_pc_i64 .LBB1_2-.Lpost_addpc0
68-
; GCN-NEXT: .Lpost_addpc0:
70+
; GCN-NEXT: s_add_pc_i64 .LBB1_2-.Lpost_addpc1
71+
; GCN-NEXT: .Lpost_addpc1:
6972
; GCN-NEXT: .LBB1_1: ; %bb2
7073
; GCN-NEXT: ;;#ASMSTART
7174
; GCN-NEXT: v_nop_e64
@@ -106,8 +109,8 @@ define amdgpu_kernel void @uniform_conditional_min_long_forward_vcnd_branch(ptr
106109
; GCN-NEXT: s_cmp_eq_f32 s0, 0
107110
; GCN-NEXT: s_cbranch_scc0 .LBB2_1
108111
; GCN-NEXT: ; %bb.3: ; %bb0
109-
; GCN-NEXT: s_add_pc_i64 .LBB2_2-.Lpost_addpc1
110-
; GCN-NEXT: .Lpost_addpc1:
112+
; GCN-NEXT: s_add_pc_i64 .LBB2_2-.Lpost_addpc2
113+
; GCN-NEXT: .Lpost_addpc2:
111114
; GCN-NEXT: .LBB2_1: ; %bb2
112115
; GCN-NEXT: ;;#ASMSTART
113116
; GCN-NEXT: ; 32 bytes
@@ -157,8 +160,8 @@ define amdgpu_kernel void @min_long_forward_vbranch(ptr addrspace(1) %arg) #0 {
157160
; GCN-NEXT: v_cmpx_ne_u32_e32 0, v2
158161
; GCN-NEXT: s_cbranch_execnz .LBB3_1
159162
; GCN-NEXT: ; %bb.3: ; %bb
160-
; GCN-NEXT: s_add_pc_i64 .LBB3_2-.Lpost_addpc2
161-
; GCN-NEXT: .Lpost_addpc2:
163+
; GCN-NEXT: s_add_pc_i64 .LBB3_2-.Lpost_addpc3
164+
; GCN-NEXT: .Lpost_addpc3:
162165
; GCN-NEXT: .LBB3_1: ; %bb2
163166
; GCN-NEXT: ;;#ASMSTART
164167
; GCN-NEXT: ; 32 bytes
@@ -209,8 +212,8 @@ define amdgpu_kernel void @long_backward_sbranch(ptr addrspace(1) %arg) #0 {
209212
; GCN-NEXT: s_cbranch_scc0 .LBB4_2
210213
; GCN-NEXT: ; %bb.3: ; %bb2
211214
; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1
212-
; GCN-NEXT: s_add_pc_i64 .LBB4_1-.Lpost_addpc3
213-
; GCN-NEXT: .Lpost_addpc3:
215+
; GCN-NEXT: s_add_pc_i64 .LBB4_1-.Lpost_addpc4
216+
; GCN-NEXT: .Lpost_addpc4:
214217
; GCN-NEXT: .LBB4_2: ; %bb3
215218
; GCN-NEXT: s_endpgm
216219
bb:
@@ -242,8 +245,8 @@ define amdgpu_kernel void @uniform_unconditional_min_long_forward_branch(ptr add
242245
; GCN-NEXT: s_mov_b32 s0, -1
243246
; GCN-NEXT: s_cbranch_scc0 .LBB5_1
244247
; GCN-NEXT: ; %bb.7: ; %bb0
245-
; GCN-NEXT: s_add_pc_i64 .LBB5_4-.Lpost_addpc5
246-
; GCN-NEXT: .Lpost_addpc5:
248+
; GCN-NEXT: s_add_pc_i64 .LBB5_4-.Lpost_addpc6
249+
; GCN-NEXT: .Lpost_addpc6:
247250
; GCN-NEXT: .LBB5_1: ; %Flow
248251
; GCN-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0
249252
; GCN-NEXT: s_cbranch_vccnz .LBB5_3
@@ -268,11 +271,11 @@ define amdgpu_kernel void @uniform_unconditional_min_long_forward_branch(ptr add
268271
; GCN-NEXT: s_sleep 0
269272
; GCN-NEXT: s_cbranch_execnz .LBB5_5
270273
; GCN-NEXT: ; %bb.9: ; %bb3
271-
; GCN-NEXT: s_add_pc_i64 .LBB5_2-.Lpost_addpc6
272-
; GCN-NEXT: .Lpost_addpc6:
274+
; GCN-NEXT: s_add_pc_i64 .LBB5_2-.Lpost_addpc7
275+
; GCN-NEXT: .Lpost_addpc7:
273276
; GCN-NEXT: .LBB5_5: ; %bb3
274-
; GCN-NEXT: s_add_pc_i64 .LBB5_3-.Lpost_addpc4
275-
; GCN-NEXT: .Lpost_addpc4:
277+
; GCN-NEXT: s_add_pc_i64 .LBB5_3-.Lpost_addpc5
278+
; GCN-NEXT: .Lpost_addpc5:
276279
bb0:
277280
%tmp = icmp ne i32 %arg1, 0
278281
br i1 %tmp, label %bb2, label %bb3
@@ -310,8 +313,8 @@ define amdgpu_kernel void @uniform_unconditional_min_long_backward_branch(ptr ad
310313
; GCN-NEXT: s_cbranch_vccz .LBB6_2
311314
; GCN-NEXT: ; %bb.3: ; %loop
312315
; GCN-NEXT: ; in Loop: Header=BB6_1 Depth=1
313-
; GCN-NEXT: s_add_pc_i64 .LBB6_1-.Lpost_addpc7
314-
; GCN-NEXT: .Lpost_addpc7:
316+
; GCN-NEXT: s_add_pc_i64 .LBB6_1-.Lpost_addpc8
317+
; GCN-NEXT: .Lpost_addpc8:
315318
; GCN-NEXT: .LBB6_2: ; %DummyReturnBlock
316319
; GCN-NEXT: s_endpgm
317320
entry:
@@ -350,8 +353,8 @@ define amdgpu_kernel void @expand_requires_expand(i32 %cond0) #0 {
350353
; GCN-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0
351354
; GCN-NEXT: s_cbranch_vccz .LBB7_3
352355
; GCN-NEXT: ; %bb.5: ; %Flow
353-
; GCN-NEXT: s_add_pc_i64 .LBB7_4-.Lpost_addpc8
354-
; GCN-NEXT: .Lpost_addpc8:
356+
; GCN-NEXT: s_add_pc_i64 .LBB7_4-.Lpost_addpc9
357+
; GCN-NEXT: .Lpost_addpc9:
355358
; GCN-NEXT: .LBB7_3: ; %bb2
356359
; GCN-NEXT: ;;#ASMSTART
357360
; GCN-NEXT: v_nop_e64
@@ -406,8 +409,8 @@ define amdgpu_kernel void @uniform_inside_divergent(ptr addrspace(1) %out, i32 %
406409
; GCN-NEXT: v_cmpx_gt_u32_e32 16, v0
407410
; GCN-NEXT: s_cbranch_execnz .LBB8_1
408411
; GCN-NEXT: ; %bb.4: ; %entry
409-
; GCN-NEXT: s_add_pc_i64 .LBB8_3-.Lpost_addpc9
410-
; GCN-NEXT: .Lpost_addpc9:
412+
; GCN-NEXT: s_add_pc_i64 .LBB8_3-.Lpost_addpc10
413+
; GCN-NEXT: .Lpost_addpc10:
411414
; GCN-NEXT: .LBB8_1: ; %if
412415
; GCN-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
413416
; GCN-NEXT: v_mov_b32_e32 v0, 0
@@ -465,8 +468,8 @@ define amdgpu_kernel void @analyze_mask_branch() #0 {
465468
; GCN-NEXT: s_and_not1_saveexec_b32 s0, s0
466469
; GCN-NEXT: s_cbranch_execnz .LBB9_3
467470
; GCN-NEXT: ; %bb.6: ; %Flow1
468-
; GCN-NEXT: s_add_pc_i64 .LBB9_5-.Lpost_addpc10
469-
; GCN-NEXT: .Lpost_addpc10:
471+
; GCN-NEXT: s_add_pc_i64 .LBB9_5-.Lpost_addpc11
472+
; GCN-NEXT: .Lpost_addpc11:
470473
; GCN-NEXT: .LBB9_3: ; %loop.preheader
471474
; GCN-NEXT: s_mov_b32 vcc_lo, 0
472475
; GCN-NEXT: .LBB9_4: ; %loop
@@ -484,8 +487,8 @@ define amdgpu_kernel void @analyze_mask_branch() #0 {
484487
; GCN-NEXT: s_cbranch_vccnz .LBB9_5
485488
; GCN-NEXT: ; %bb.8: ; %loop
486489
; GCN-NEXT: ; in Loop: Header=BB9_4 Depth=1
487-
; GCN-NEXT: s_add_pc_i64 .LBB9_4-.Lpost_addpc11
488-
; GCN-NEXT: .Lpost_addpc11:
490+
; GCN-NEXT: s_add_pc_i64 .LBB9_4-.Lpost_addpc12
491+
; GCN-NEXT: .Lpost_addpc12:
489492
; GCN-NEXT: .LBB9_5: ; %UnifiedReturnBlock
490493
; GCN-NEXT: s_endpgm
491494
entry:
@@ -528,20 +531,20 @@ define amdgpu_kernel void @long_branch_hang(ptr addrspace(1) nocapture %arg, i32
528531
; GCN-NEXT: s_cmp_lt_i32 s3, 6
529532
; GCN-NEXT: s_cbranch_scc0 .LBB10_1
530533
; GCN-NEXT: ; %bb.10: ; %bb
531-
; GCN-NEXT: s_add_pc_i64 .LBB10_4-.Lpost_addpc13
532-
; GCN-NEXT: .Lpost_addpc13:
534+
; GCN-NEXT: s_add_pc_i64 .LBB10_4-.Lpost_addpc14
535+
; GCN-NEXT: .Lpost_addpc14:
533536
; GCN-NEXT: .LBB10_1: ; %Flow
534537
; GCN-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s7
535538
; GCN-NEXT: s_cbranch_vccnz .LBB10_2
536539
; GCN-NEXT: ; %bb.12: ; %Flow
537-
; GCN-NEXT: s_add_pc_i64 .LBB10_5-.Lpost_addpc14
538-
; GCN-NEXT: .Lpost_addpc14:
540+
; GCN-NEXT: s_add_pc_i64 .LBB10_5-.Lpost_addpc15
541+
; GCN-NEXT: .Lpost_addpc15:
539542
; GCN-NEXT: .LBB10_2: ; %Flow5
540543
; GCN-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0
541544
; GCN-NEXT: s_cbranch_vccz .LBB10_3
542545
; GCN-NEXT: ; %bb.14: ; %Flow5
543-
; GCN-NEXT: s_add_pc_i64 .LBB10_6-.Lpost_addpc15
544-
; GCN-NEXT: .Lpost_addpc15:
546+
; GCN-NEXT: s_add_pc_i64 .LBB10_6-.Lpost_addpc16
547+
; GCN-NEXT: .Lpost_addpc16:
545548
; GCN-NEXT: .LBB10_3: ; %bb14
546549
; GCN-NEXT: s_cmp_lt_i32 s1, 9
547550
; GCN-NEXT: s_cselect_b32 s0, -1, 0
@@ -553,8 +556,8 @@ define amdgpu_kernel void @long_branch_hang(ptr addrspace(1) nocapture %arg, i32
553556
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
554557
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
555558
; GCN-NEXT: ; %bb.8: ; %bb14
556-
; GCN-NEXT: s_add_pc_i64 .LBB10_7-.Lpost_addpc12
557-
; GCN-NEXT: .Lpost_addpc12:
559+
; GCN-NEXT: s_add_pc_i64 .LBB10_7-.Lpost_addpc13
560+
; GCN-NEXT: .Lpost_addpc13:
558561
; GCN-NEXT: .LBB10_4: ; %bb13
559562
; GCN-NEXT: ;;#ASMSTART
560563
; GCN-NEXT: v_nop_e64
@@ -565,8 +568,8 @@ define amdgpu_kernel void @long_branch_hang(ptr addrspace(1) nocapture %arg, i32
565568
; GCN-NEXT: s_sleep 0
566569
; GCN-NEXT: s_cbranch_execz .LBB10_5
567570
; GCN-NEXT: ; %bb.16: ; %bb13
568-
; GCN-NEXT: s_add_pc_i64 .LBB10_2-.Lpost_addpc16
569-
; GCN-NEXT: .Lpost_addpc16:
571+
; GCN-NEXT: s_add_pc_i64 .LBB10_2-.Lpost_addpc17
572+
; GCN-NEXT: .Lpost_addpc17:
570573
; GCN-NEXT: .LBB10_5: ; %bb9
571574
; GCN-NEXT: s_cmp_lt_i32 s3, 11
572575
; GCN-NEXT: s_cselect_b32 s0, -1, 0
@@ -577,8 +580,8 @@ define amdgpu_kernel void @long_branch_hang(ptr addrspace(1) nocapture %arg, i32
577580
; GCN-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0
578581
; GCN-NEXT: s_cbranch_vccnz .LBB10_6
579582
; GCN-NEXT: ; %bb.18: ; %bb9
580-
; GCN-NEXT: s_add_pc_i64 .LBB10_3-.Lpost_addpc17
581-
; GCN-NEXT: .Lpost_addpc17:
583+
; GCN-NEXT: s_add_pc_i64 .LBB10_3-.Lpost_addpc18
584+
; GCN-NEXT: .Lpost_addpc18:
582585
; GCN-NEXT: .LBB10_6:
583586
; GCN-NEXT: ; implicit-def: $vgpr0
584587
; GCN-NEXT: .LBB10_7: ; %bb19

0 commit comments

Comments
 (0)