Skip to content

Commit 7d23323

Browse files
authored
[AMDGPU] Fix destination op_sel for v_cvt_scale32_* and v_cvt_sr_* (#151411)
GFX950 uses OP_SEL[MSB:LSB] for both src reads and dest writes. So this patch essentially revert the work from #151286 regarding dest writes.
1 parent ed940d7 commit 7d23323

File tree

5 files changed

+37
-35
lines changed

5 files changed

+37
-35
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7038,13 +7038,13 @@ void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
70387038
MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
70397039
assert(OpIdx >= 0 && "expected to match an immediate operand");
70407040
MIB.addImm(
7041-
(MI.getOperand(OpIdx).getImm() & 0x2) ? (int64_t)SISrcMods::OP_SEL_0 : 0);
7041+
(MI.getOperand(OpIdx).getImm() & 0x1) ? (int64_t)SISrcMods::OP_SEL_0 : 0);
70427042
}
70437043

70447044
void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
70457045
MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
70467046
assert(OpIdx >= 0 && "expected to match an immediate operand");
7047-
MIB.addImm((MI.getOperand(OpIdx).getImm() & 0x1)
7047+
MIB.addImm((MI.getOperand(OpIdx).getImm() & 0x2)
70487048
? (int64_t)SISrcMods::DST_OP_SEL
70497049
: 0);
70507050
}

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,8 +1015,10 @@ class SrcAndDstSelToOpSelXForm<int modifier_idx, bit dest_sel> : SDNodeXForm<tim
10151015
if (}] # modifier_idx # [{ == 0) {
10161016
New = (}] # dest_sel # [{ == 1) ? ((Val & 0x1) ? (SISrcMods::OP_SEL_0 | SISrcMods::DST_OP_SEL) : SISrcMods::DST_OP_SEL)
10171017
: ((Val & 0x1) ? SISrcMods::OP_SEL_0 : SISrcMods::NONE);
1018-
} else if (}] # modifier_idx # [{== 1 || }] # modifier_idx # [{ == 2) {
1019-
New = (Val & 0x2) ? SISrcMods::OP_SEL_0 : SISrcMods::NONE;
1018+
} else if (}] # modifier_idx # [{== 1) {
1019+
New = (Val & 0x2) ? SISrcMods::OP_SEL_0 : SISrcMods::NONE;
1020+
} if (}] # modifier_idx # [{== 2) {
1021+
New = (Val & 0x1) ? SISrcMods::OP_SEL_0 : SISrcMods::NONE;
10201022
}
10211023
return CurDAG->getTargetConstant(New, SDLoc(N), MVT::i32);
10221024
}]>;
@@ -1060,7 +1062,7 @@ def gi_SrcSelToOpSelXForm : GICustomOperandRenderer<"renderSrcSelToOpSelXForm">,
10601062
def DstSelToOpSel3XForm : SDNodeXForm<timm, [{
10611063
uint32_t V = N->getZExtValue();
10621064
return CurDAG->getTargetConstant(
1063-
(V & 0x1) ? SISrcMods::DST_OP_SEL : SISrcMods::NONE,
1065+
(V & 0x2) ? SISrcMods::DST_OP_SEL : SISrcMods::NONE,
10641066
SDLoc(N), MVT::i32);
10651067
}]>;
10661068
def gi_DstSelToOpSel3XForm : GICustomOperandRenderer<"renderDstSelToOpSel3XFormXForm">,

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk.gfx950.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -813,7 +813,7 @@ define i32 @test_cvt_scale_fp4_f32_byte1(i32 %old, float %src0, float %src1, flo
813813
; GCN-LABEL: test_cvt_scale_fp4_f32_byte1:
814814
; GCN: ; %bb.0:
815815
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
816-
; GCN-NEXT: v_cvt_scalef32_pk_fp4_f32 v0, v1, v2, v3 op_sel:[0,0,0,1]
816+
; GCN-NEXT: v_cvt_scalef32_pk_fp4_f32 v0, v1, v2, v3 op_sel:[0,0,1,0]
817817
; GCN-NEXT: s_setpc_b64 s[30:31]
818818
%ret = tail call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f32(i32 %old, float %src0, float %src1, float %scale, i32 1)
819819
ret i32 %ret
@@ -823,7 +823,7 @@ define i32 @test_cvt_scale_fp4_f32_byte2(i32 %old, float %src0, float %src1, flo
823823
; GCN-LABEL: test_cvt_scale_fp4_f32_byte2:
824824
; GCN: ; %bb.0:
825825
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
826-
; GCN-NEXT: v_cvt_scalef32_pk_fp4_f32 v0, v1, v2, v3 op_sel:[0,0,1,0]
826+
; GCN-NEXT: v_cvt_scalef32_pk_fp4_f32 v0, v1, v2, v3 op_sel:[0,0,0,1]
827827
; GCN-NEXT: s_setpc_b64 s[30:31]
828828
%ret = tail call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f32(i32 %old, float %src0, float %src1, float %scale, i32 2)
829829
ret i32 %ret
@@ -1302,7 +1302,7 @@ define i32 @test_cvt_scalef32_fp4_f16_byte1(<2 x half> %src0, float %scale, i32
13021302
; GCN-LABEL: test_cvt_scalef32_fp4_f16_byte1:
13031303
; GCN: ; %bb.0:
13041304
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1305-
; GCN-NEXT: v_cvt_scalef32_pk_fp4_f16 v2, v0, v1 op_sel:[0,0,0,1]
1305+
; GCN-NEXT: v_cvt_scalef32_pk_fp4_f16 v2, v0, v1 op_sel:[0,0,1,0]
13061306
; GCN-NEXT: s_nop 0
13071307
; GCN-NEXT: v_mov_b32_e32 v0, v2
13081308
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -1314,7 +1314,7 @@ define i32 @test_cvt_scalef32_fp4_f16_byte2(<2 x half> %src0, float %scale, i32
13141314
; GCN-LABEL: test_cvt_scalef32_fp4_f16_byte2:
13151315
; GCN: ; %bb.0:
13161316
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1317-
; GCN-NEXT: v_cvt_scalef32_pk_fp4_f16 v2, v0, v1 op_sel:[0,0,1,0]
1317+
; GCN-NEXT: v_cvt_scalef32_pk_fp4_f16 v2, v0, v1 op_sel:[0,0,0,1]
13181318
; GCN-NEXT: s_nop 0
13191319
; GCN-NEXT: v_mov_b32_e32 v0, v2
13201320
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -1380,7 +1380,7 @@ define i32 @test_cvt_scalef32_fp4_bf16_byte1(<2 x bfloat> %src0, float %scale, i
13801380
; GCN-LABEL: test_cvt_scalef32_fp4_bf16_byte1:
13811381
; GCN: ; %bb.0:
13821382
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1383-
; GCN-NEXT: v_cvt_scalef32_pk_fp4_bf16 v2, v0, v1 op_sel:[0,0,0,1]
1383+
; GCN-NEXT: v_cvt_scalef32_pk_fp4_bf16 v2, v0, v1 op_sel:[0,0,1,0]
13841384
; GCN-NEXT: s_nop 0
13851385
; GCN-NEXT: v_mov_b32_e32 v0, v2
13861386
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -1392,7 +1392,7 @@ define i32 @test_cvt_scalef32_fp4_bf16_byte2(<2 x bfloat> %src0, float %scale, i
13921392
; GCN-LABEL: test_cvt_scalef32_fp4_bf16_byte2:
13931393
; GCN: ; %bb.0:
13941394
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1395-
; GCN-NEXT: v_cvt_scalef32_pk_fp4_bf16 v2, v0, v1 op_sel:[0,0,1,0]
1395+
; GCN-NEXT: v_cvt_scalef32_pk_fp4_bf16 v2, v0, v1 op_sel:[0,0,0,1]
13961396
; GCN-NEXT: s_nop 0
13971397
; GCN-NEXT: v_mov_b32_e32 v0, v2
13981398
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -2072,7 +2072,7 @@ define i32 @test_cvt_scale_fp4_f32_byte1_inreg_src(i32 %old, float inreg %src0,
20722072
; GCN-LABEL: test_cvt_scale_fp4_f32_byte1_inreg_src:
20732073
; GCN: ; %bb.0:
20742074
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2075-
; GCN-NEXT: v_cvt_scalef32_pk_fp4_f32 v0, s0, v1, v2 op_sel:[0,0,0,1]
2075+
; GCN-NEXT: v_cvt_scalef32_pk_fp4_f32 v0, s0, v1, v2 op_sel:[0,0,1,0]
20762076
; GCN-NEXT: s_setpc_b64 s[30:31]
20772077
%ret = tail call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f32(i32 %old, float %src0, float %src1, float %scale, i32 1)
20782078
ret i32 %ret
@@ -2082,7 +2082,7 @@ define i32 @test_cvt_scale_fp4_f32_byte2_inreg_src(i32 %old, float inreg %src0,
20822082
; GCN-LABEL: test_cvt_scale_fp4_f32_byte2_inreg_src:
20832083
; GCN: ; %bb.0:
20842084
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2085-
; GCN-NEXT: v_cvt_scalef32_pk_fp4_f32 v0, s0, v1, v2 op_sel:[0,0,1,0]
2085+
; GCN-NEXT: v_cvt_scalef32_pk_fp4_f32 v0, s0, v1, v2 op_sel:[0,0,0,1]
20862086
; GCN-NEXT: s_setpc_b64 s[30:31]
20872087
%ret = tail call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f32(i32 %old, float %src0, float %src1, float %scale, i32 2)
20882088
ret i32 %ret
@@ -2515,7 +2515,7 @@ define i32 @test_cvt_scalef32_fp4_f16_byte1_inreg_src(<2 x half> inreg %src0, fl
25152515
; GCN-LABEL: test_cvt_scalef32_fp4_f16_byte1_inreg_src:
25162516
; GCN: ; %bb.0:
25172517
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2518-
; GCN-NEXT: v_cvt_scalef32_pk_fp4_f16 v1, s0, v0 op_sel:[0,0,0,1]
2518+
; GCN-NEXT: v_cvt_scalef32_pk_fp4_f16 v1, s0, v0 op_sel:[0,0,1,0]
25192519
; GCN-NEXT: s_nop 0
25202520
; GCN-NEXT: v_mov_b32_e32 v0, v1
25212521
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -2527,7 +2527,7 @@ define i32 @test_cvt_scalef32_fp4_f16_byte2_inreg_src(<2 x half> inreg %src0, fl
25272527
; GCN-LABEL: test_cvt_scalef32_fp4_f16_byte2_inreg_src:
25282528
; GCN: ; %bb.0:
25292529
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2530-
; GCN-NEXT: v_cvt_scalef32_pk_fp4_f16 v1, s0, v0 op_sel:[0,0,1,0]
2530+
; GCN-NEXT: v_cvt_scalef32_pk_fp4_f16 v1, s0, v0 op_sel:[0,0,0,1]
25312531
; GCN-NEXT: s_nop 0
25322532
; GCN-NEXT: v_mov_b32_e32 v0, v1
25332533
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -2562,7 +2562,7 @@ define i32 @test_cvt_scalef32_fp4_bf16_byte1_inreg_src(<2 x bfloat> inreg %src0,
25622562
; GCN-LABEL: test_cvt_scalef32_fp4_bf16_byte1_inreg_src:
25632563
; GCN: ; %bb.0:
25642564
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2565-
; GCN-NEXT: v_cvt_scalef32_pk_fp4_bf16 v1, s0, v0 op_sel:[0,0,0,1]
2565+
; GCN-NEXT: v_cvt_scalef32_pk_fp4_bf16 v1, s0, v0 op_sel:[0,0,1,0]
25662566
; GCN-NEXT: s_nop 0
25672567
; GCN-NEXT: v_mov_b32_e32 v0, v1
25682568
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -2574,7 +2574,7 @@ define i32 @test_cvt_scalef32_fp4_bf16_byte2_inreg_src(<2 x bfloat> inreg %src0,
25742574
; GCN-LABEL: test_cvt_scalef32_fp4_bf16_byte2_inreg_src:
25752575
; GCN: ; %bb.0:
25762576
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2577-
; GCN-NEXT: v_cvt_scalef32_pk_fp4_bf16 v1, s0, v0 op_sel:[0,0,1,0]
2577+
; GCN-NEXT: v_cvt_scalef32_pk_fp4_bf16 v1, s0, v0 op_sel:[0,0,0,1]
25782578
; GCN-NEXT: s_nop 0
25792579
; GCN-NEXT: v_mov_b32_e32 v0, v1
25802580
; GCN-NEXT: s_setpc_b64 s[30:31]

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ define amdgpu_ps void @test_cvt_scalef32_sr_bf8_bf16_dst_sel_1(ptr addrspace(1)
2828
; GFX950: ; %bb.0:
2929
; GFX950-NEXT: global_load_dword v5, v[0:1], off
3030
; GFX950-NEXT: s_waitcnt vmcnt(0)
31-
; GFX950-NEXT: v_cvt_scalef32_sr_bf8_bf16 v5, v2, v3, v4 op_sel:[0,0,0,1]
31+
; GFX950-NEXT: v_cvt_scalef32_sr_bf8_bf16 v5, v2, v3, v4 op_sel:[0,0,1,0]
3232
; GFX950-NEXT: global_store_dword v[0:1], v5, off
3333
; GFX950-NEXT: s_endpgm
3434
%old = load i32, ptr addrspace(1) %out, align 4
@@ -42,7 +42,7 @@ define amdgpu_ps void @test_cvt_scalef32_sr_bf8_bf16_dst_sel_2(ptr addrspace(1)
4242
; GFX950: ; %bb.0:
4343
; GFX950-NEXT: global_load_dword v5, v[0:1], off
4444
; GFX950-NEXT: s_waitcnt vmcnt(0)
45-
; GFX950-NEXT: v_cvt_scalef32_sr_bf8_bf16 v5, v2, v3, v4 op_sel:[0,0,1,0]
45+
; GFX950-NEXT: v_cvt_scalef32_sr_bf8_bf16 v5, v2, v3, v4 op_sel:[0,0,0,1]
4646
; GFX950-NEXT: global_store_dword v[0:1], v5, off
4747
; GFX950-NEXT: s_endpgm
4848
%old = load i32, ptr addrspace(1) %out, align 4
@@ -84,7 +84,7 @@ define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f16_dst_sel_1(ptr addrspace(1) %
8484
; GFX950: ; %bb.0:
8585
; GFX950-NEXT: global_load_dword v5, v[0:1], off
8686
; GFX950-NEXT: s_waitcnt vmcnt(0)
87-
; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f16 v5, v2, v3, v4 op_sel:[0,0,0,1]
87+
; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f16 v5, v2, v3, v4 op_sel:[0,0,1,0]
8888
; GFX950-NEXT: global_store_dword v[0:1], v5, off
8989
; GFX950-NEXT: s_endpgm
9090
%old = load i32, ptr addrspace(1) %out, align 4
@@ -98,7 +98,7 @@ define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f16_dst_sel_2(ptr addrspace(1) %
9898
; GFX950: ; %bb.0:
9999
; GFX950-NEXT: global_load_dword v5, v[0:1], off
100100
; GFX950-NEXT: s_waitcnt vmcnt(0)
101-
; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f16 v5, v2, v3, v4 op_sel:[0,0,1,0]
101+
; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f16 v5, v2, v3, v4 op_sel:[0,0,0,1]
102102
; GFX950-NEXT: global_store_dword v[0:1], v5, off
103103
; GFX950-NEXT: s_endpgm
104104
%old = load i32, ptr addrspace(1) %out, align 4
@@ -140,7 +140,7 @@ define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f32_dst_sel_1(ptr addrspace(1) %
140140
; GFX950: ; %bb.0:
141141
; GFX950-NEXT: global_load_dword v5, v[0:1], off
142142
; GFX950-NEXT: s_waitcnt vmcnt(0)
143-
; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f32 v5, v2, v3, v4 op_sel:[0,0,0,1]
143+
; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f32 v5, v2, v3, v4 op_sel:[0,0,1,0]
144144
; GFX950-NEXT: global_store_dword v[0:1], v5, off
145145
; GFX950-NEXT: s_endpgm
146146
%old = load i32, ptr addrspace(1) %out, align 4
@@ -154,7 +154,7 @@ define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f32_dst_sel_2(ptr addrspace(1) %
154154
; GFX950: ; %bb.0:
155155
; GFX950-NEXT: global_load_dword v5, v[0:1], off
156156
; GFX950-NEXT: s_waitcnt vmcnt(0)
157-
; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f32 v5, v2, v3, v4 op_sel:[0,0,1,0]
157+
; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f32 v5, v2, v3, v4 op_sel:[0,0,0,1]
158158
; GFX950-NEXT: global_store_dword v[0:1], v5, off
159159
; GFX950-NEXT: s_endpgm
160160
%old = load i32, ptr addrspace(1) %out, align 4
@@ -196,7 +196,7 @@ define amdgpu_ps void @test_cvt_scalef32_sr_fp8_bf16_dst_sel_1(ptr addrspace(1)
196196
; GFX950: ; %bb.0:
197197
; GFX950-NEXT: global_load_dword v5, v[0:1], off
198198
; GFX950-NEXT: s_waitcnt vmcnt(0)
199-
; GFX950-NEXT: v_cvt_scalef32_sr_fp8_bf16 v5, v2, v3, v4 op_sel:[0,0,0,1]
199+
; GFX950-NEXT: v_cvt_scalef32_sr_fp8_bf16 v5, v2, v3, v4 op_sel:[0,0,1,0]
200200
; GFX950-NEXT: global_store_dword v[0:1], v5, off
201201
; GFX950-NEXT: s_endpgm
202202
%old = load i32, ptr addrspace(1) %out, align 4
@@ -210,7 +210,7 @@ define amdgpu_ps void @test_cvt_scalef32_sr_fp8_bf16_dst_sel_2(ptr addrspace(1)
210210
; GFX950: ; %bb.0:
211211
; GFX950-NEXT: global_load_dword v5, v[0:1], off
212212
; GFX950-NEXT: s_waitcnt vmcnt(0)
213-
; GFX950-NEXT: v_cvt_scalef32_sr_fp8_bf16 v5, v2, v3, v4 op_sel:[0,0,1,0]
213+
; GFX950-NEXT: v_cvt_scalef32_sr_fp8_bf16 v5, v2, v3, v4 op_sel:[0,0,0,1]
214214
; GFX950-NEXT: global_store_dword v[0:1], v5, off
215215
; GFX950-NEXT: s_endpgm
216216
%old = load i32, ptr addrspace(1) %out, align 4
@@ -252,7 +252,7 @@ define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f16_dst_sel_1(ptr addrspace(1) %
252252
; GFX950: ; %bb.0:
253253
; GFX950-NEXT: global_load_dword v5, v[0:1], off
254254
; GFX950-NEXT: s_waitcnt vmcnt(0)
255-
; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f16 v5, v2, v3, v4 op_sel:[0,0,0,1]
255+
; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f16 v5, v2, v3, v4 op_sel:[0,0,1,0]
256256
; GFX950-NEXT: global_store_dword v[0:1], v5, off
257257
; GFX950-NEXT: s_endpgm
258258
%old = load i32, ptr addrspace(1) %out, align 4
@@ -266,7 +266,7 @@ define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f16_dst_sel_2(ptr addrspace(1) %
266266
; GFX950: ; %bb.0:
267267
; GFX950-NEXT: global_load_dword v5, v[0:1], off
268268
; GFX950-NEXT: s_waitcnt vmcnt(0)
269-
; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f16 v5, v2, v3, v4 op_sel:[0,0,1,0]
269+
; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f16 v5, v2, v3, v4 op_sel:[0,0,0,1]
270270
; GFX950-NEXT: global_store_dword v[0:1], v5, off
271271
; GFX950-NEXT: s_endpgm
272272
%old = load i32, ptr addrspace(1) %out, align 4
@@ -308,7 +308,7 @@ define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f32_dst_sel_1(ptr addrspace(1) %
308308
; GFX950: ; %bb.0:
309309
; GFX950-NEXT: global_load_dword v5, v[0:1], off
310310
; GFX950-NEXT: s_waitcnt vmcnt(0)
311-
; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f32 v5, v2, v3, v4 op_sel:[0,0,0,1]
311+
; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f32 v5, v2, v3, v4 op_sel:[0,0,1,0]
312312
; GFX950-NEXT: global_store_dword v[0:1], v5, off
313313
; GFX950-NEXT: s_endpgm
314314
%old = load i32, ptr addrspace(1) %out, align 4
@@ -322,7 +322,7 @@ define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f32_dst_sel_2(ptr addrspace(1) %
322322
; GFX950: ; %bb.0:
323323
; GFX950-NEXT: global_load_dword v5, v[0:1], off
324324
; GFX950-NEXT: s_waitcnt vmcnt(0)
325-
; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f32 v5, v2, v3, v4 op_sel:[0,0,1,0]
325+
; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f32 v5, v2, v3, v4 op_sel:[0,0,0,1]
326326
; GFX950-NEXT: global_store_dword v[0:1], v5, off
327327
; GFX950-NEXT: s_endpgm
328328
%old = load i32, ptr addrspace(1) %out, align 4

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.pk.gfx950.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ define amdgpu_ps void @test_scalef32_sr_pk_fp4_f16_dst_sel_1(ptr addrspace(1) %o
2525
; GFX950: ; %bb.0:
2626
; GFX950-NEXT: global_load_dword v5, v[0:1], off
2727
; GFX950-NEXT: s_waitcnt vmcnt(0)
28-
; GFX950-NEXT: v_cvt_scalef32_sr_pk_fp4_f16 v6, v2, v3, v4 op_sel:[0,0,0,1]
28+
; GFX950-NEXT: v_cvt_scalef32_sr_pk_fp4_f16 v6, v2, v3, v4 op_sel:[0,0,1,0]
2929
; GFX950-NEXT: global_store_dword v[0:1], v6, off
3030
; GFX950-NEXT: s_endpgm
3131
%old = load i32, ptr addrspace(1) %out, align 4
@@ -39,7 +39,7 @@ define amdgpu_ps void @test_scalef32_sr_pk_fp4_f16_dst_sel_2(ptr addrspace(1) %o
3939
; GFX950: ; %bb.0:
4040
; GFX950-NEXT: global_load_dword v5, v[0:1], off
4141
; GFX950-NEXT: s_waitcnt vmcnt(0)
42-
; GFX950-NEXT: v_cvt_scalef32_sr_pk_fp4_f16 v6, v2, v3, v4 op_sel:[0,0,1,0]
42+
; GFX950-NEXT: v_cvt_scalef32_sr_pk_fp4_f16 v6, v2, v3, v4 op_sel:[0,0,0,1]
4343
; GFX950-NEXT: global_store_dword v[0:1], v6, off
4444
; GFX950-NEXT: s_endpgm
4545
%old = load i32, ptr addrspace(1) %out, align 4
@@ -81,7 +81,7 @@ define amdgpu_ps void @test_scalef32_sr_pk_fp4_bf16_dst_sel_1(ptr addrspace(1) %
8181
; GFX950: ; %bb.0:
8282
; GFX950-NEXT: global_load_dword v5, v[0:1], off
8383
; GFX950-NEXT: s_waitcnt vmcnt(0)
84-
; GFX950-NEXT: v_cvt_scalef32_sr_pk_fp4_bf16 v6, v2, v3, v4 op_sel:[0,0,0,1]
84+
; GFX950-NEXT: v_cvt_scalef32_sr_pk_fp4_bf16 v6, v2, v3, v4 op_sel:[0,0,1,0]
8585
; GFX950-NEXT: global_store_dword v[0:1], v6, off
8686
; GFX950-NEXT: s_endpgm
8787
%old = load i32, ptr addrspace(1) %out, align 4
@@ -95,7 +95,7 @@ define amdgpu_ps void @test_scalef32_sr_pk_fp4_bf16_dst_sel_2(ptr addrspace(1) %
9595
; GFX950: ; %bb.0:
9696
; GFX950-NEXT: global_load_dword v5, v[0:1], off
9797
; GFX950-NEXT: s_waitcnt vmcnt(0)
98-
; GFX950-NEXT: v_cvt_scalef32_sr_pk_fp4_bf16 v6, v2, v3, v4 op_sel:[0,0,1,0]
98+
; GFX950-NEXT: v_cvt_scalef32_sr_pk_fp4_bf16 v6, v2, v3, v4 op_sel:[0,0,0,1]
9999
; GFX950-NEXT: global_store_dword v[0:1], v6, off
100100
; GFX950-NEXT: s_endpgm
101101
%old = load i32, ptr addrspace(1) %out, align 4
@@ -137,7 +137,7 @@ define amdgpu_ps void @test_scalef32_sr_pk_fp4_f32_dst_sel_1(ptr addrspace(1) %o
137137
; GFX950: ; %bb.0:
138138
; GFX950-NEXT: global_load_dword v6, v[0:1], off
139139
; GFX950-NEXT: s_waitcnt vmcnt(0)
140-
; GFX950-NEXT: v_cvt_scalef32_sr_pk_fp4_f32 v7, v[2:3], v4, v5 op_sel:[0,0,0,1]
140+
; GFX950-NEXT: v_cvt_scalef32_sr_pk_fp4_f32 v7, v[2:3], v4, v5 op_sel:[0,0,1,0]
141141
; GFX950-NEXT: global_store_dword v[0:1], v7, off
142142
; GFX950-NEXT: s_endpgm
143143
%old = load i32, ptr addrspace(1) %out, align 4
@@ -151,7 +151,7 @@ define amdgpu_ps void @test_scalef32_sr_pk_fp4_f32_dst_sel_2(ptr addrspace(1) %o
151151
; GFX950: ; %bb.0:
152152
; GFX950-NEXT: global_load_dword v6, v[0:1], off
153153
; GFX950-NEXT: s_waitcnt vmcnt(0)
154-
; GFX950-NEXT: v_cvt_scalef32_sr_pk_fp4_f32 v7, v[2:3], v4, v5 op_sel:[0,0,1,0]
154+
; GFX950-NEXT: v_cvt_scalef32_sr_pk_fp4_f32 v7, v[2:3], v4, v5 op_sel:[0,0,0,1]
155155
; GFX950-NEXT: global_store_dword v[0:1], v7, off
156156
; GFX950-NEXT: s_endpgm
157157
%old = load i32, ptr addrspace(1) %out, align 4

0 commit comments

Comments
 (0)