Skip to content

Commit 9f50224

Browse files
authored
[DAG] Remove Depth=1 hack from isGuaranteedNotToBeUndefOrPoison checks (#152127)
Now that #146490 removed the assertion in visitFreeze to assert that the node was still isGuaranteedNotToBeUndefOrPoison we no longer need this reduced depth hack (which had to account for the difference in depth of freeze(op()) vs op(freeze()) Helps with some of the minor regressions in #150017
1 parent ab6923b commit 9f50224

File tree

8 files changed

+36
-46
lines changed

8 files changed

+36
-46
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16813,8 +16813,7 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1681316813
SmallSet<SDValue, 8> MaybePoisonOperands;
1681416814
SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
1681516815
for (auto [OpNo, Op] : enumerate(N0->ops())) {
16816-
if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
16817-
/*Depth*/ 1))
16816+
if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly=*/false))
1681816817
continue;
1681916818
bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
1682016819
bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6351,8 +6351,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
63516351
break;
63526352
case ISD::FREEZE:
63536353
assert(VT == N1.getValueType() && "Unexpected VT!");
6354-
if (isGuaranteedNotToBeUndefOrPoison(N1, /*PoisonOnly*/ false,
6355-
/*Depth*/ 1))
6354+
if (isGuaranteedNotToBeUndefOrPoison(N1, /*PoisonOnly=*/false))
63566355
return N1;
63576356
break;
63586357
case ISD::TokenFactor:

llvm/test/CodeGen/AMDGPU/div_i128.ll

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2680,9 +2680,8 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
26802680
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
26812681
; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1
26822682
; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9]
2683-
; GFX9-O0-NEXT: s_mov_b64 s[14:15], -1
2684-
; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[8:9]
2685-
; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
2683+
; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1
2684+
; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
26862685
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
26872686
; GFX9-O0-NEXT: s_mov_b32 s14, s13
26882687
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
@@ -2699,19 +2698,16 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
26992698
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
27002699
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
27012700
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
2702-
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
27032701
; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
2704-
; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[12:13]
2705-
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
2702+
; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9]
27062703
; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10
2707-
; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[12:13]
2704+
; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9]
27082705
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
27092706
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
27102707
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
27112708
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4
2712-
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
27132709
; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
2714-
; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[12:13]
2710+
; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9]
27152711
; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10
27162712
; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9]
27172713
; GFX9-O0-NEXT: ; implicit-def: $sgpr8

llvm/test/CodeGen/AMDGPU/rem_i128.ll

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1913,9 +1913,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
19131913
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
19141914
; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1
19151915
; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9]
1916-
; GFX9-O0-NEXT: s_mov_b64 s[14:15], -1
1917-
; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[8:9]
1918-
; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
1916+
; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1
1917+
; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
19191918
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
19201919
; GFX9-O0-NEXT: s_mov_b32 s14, s13
19211920
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
@@ -1932,19 +1931,16 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
19321931
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
19331932
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
19341933
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
1935-
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
19361934
; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
1937-
; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[12:13]
1938-
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
1935+
; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9]
19391936
; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10
1940-
; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[12:13]
1937+
; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9]
19411938
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
19421939
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
19431940
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
19441941
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4
1945-
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
19461942
; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
1947-
; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[12:13]
1943+
; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9]
19481944
; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10
19491945
; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9]
19501946
; GFX9-O0-NEXT: ; implicit-def: $sgpr8

llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3963,8 +3963,8 @@ define i64 @test_vector_reduce_smax_v16i64(<16 x i64> %v) {
39633963
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
39643964
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
39653965
; GFX7-SDAG-NEXT: v_cmp_gt_i64_e32 vcc, v[2:3], v[4:5]
3966-
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
39673966
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
3967+
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
39683968
; GFX7-SDAG-NEXT: v_cmp_gt_i64_e32 vcc, v[0:1], v[2:3]
39693969
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
39703970
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4067,8 +4067,8 @@ define i64 @test_vector_reduce_smax_v16i64(<16 x i64> %v) {
40674067
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
40684068
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
40694069
; GFX8-SDAG-NEXT: v_cmp_gt_i64_e32 vcc, v[2:3], v[4:5]
4070-
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
40714070
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
4071+
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
40724072
; GFX8-SDAG-NEXT: v_cmp_gt_i64_e32 vcc, v[0:1], v[2:3]
40734073
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
40744074
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4175,8 +4175,8 @@ define i64 @test_vector_reduce_smax_v16i64(<16 x i64> %v) {
41754175
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
41764176
; GFX9-SDAG-NEXT: v_cmp_gt_i64_e32 vcc, v[2:3], v[4:5]
41774177
; GFX9-SDAG-NEXT: s_nop 1
4178-
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
41794178
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
4179+
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
41804180
; GFX9-SDAG-NEXT: v_cmp_gt_i64_e32 vcc, v[0:1], v[2:3]
41814181
; GFX9-SDAG-NEXT: s_nop 1
41824182
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
@@ -4283,8 +4283,8 @@ define i64 @test_vector_reduce_smax_v16i64(<16 x i64> %v) {
42834283
; GFX10-SDAG-NEXT: v_cmp_gt_i64_e64 s4, v[2:3], v[6:7]
42844284
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
42854285
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
4286-
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s4
42874286
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s4
4287+
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s4
42884288
; GFX10-SDAG-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[2:3]
42894289
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
42904290
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
@@ -4387,8 +4387,8 @@ define i64 @test_vector_reduce_smax_v16i64(<16 x i64> %v) {
43874387
; GFX11-SDAG-NEXT: v_cmp_gt_i64_e64 s0, v[2:3], v[6:7]
43884388
; GFX11-SDAG-NEXT: v_dual_cndmask_b32 v1, v5, v1 :: v_dual_cndmask_b32 v0, v4, v0
43894389
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
4390-
; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
43914390
; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s0
4391+
; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
43924392
; GFX11-SDAG-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[2:3]
43934393
; GFX11-SDAG-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_cndmask_b32 v1, v3, v1
43944394
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -4502,8 +4502,8 @@ define i64 @test_vector_reduce_smax_v16i64(<16 x i64> %v) {
45024502
; GFX12-SDAG-NEXT: v_dual_cndmask_b32 v1, v5, v1 :: v_dual_cndmask_b32 v0, v4, v0
45034503
; GFX12-SDAG-NEXT: s_wait_alu 0xf1ff
45044504
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
4505-
; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
45064505
; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s0
4506+
; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
45074507
; GFX12-SDAG-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[2:3]
45084508
; GFX12-SDAG-NEXT: s_wait_alu 0xfffd
45094509
; GFX12-SDAG-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_cndmask_b32 v1, v3, v1

llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3963,8 +3963,8 @@ define i64 @test_vector_reduce_smin_v16i64(<16 x i64> %v) {
39633963
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
39643964
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
39653965
; GFX7-SDAG-NEXT: v_cmp_lt_i64_e32 vcc, v[2:3], v[4:5]
3966-
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
39673966
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
3967+
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
39683968
; GFX7-SDAG-NEXT: v_cmp_lt_i64_e32 vcc, v[0:1], v[2:3]
39693969
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
39703970
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4067,8 +4067,8 @@ define i64 @test_vector_reduce_smin_v16i64(<16 x i64> %v) {
40674067
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
40684068
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
40694069
; GFX8-SDAG-NEXT: v_cmp_lt_i64_e32 vcc, v[2:3], v[4:5]
4070-
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
40714070
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
4071+
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
40724072
; GFX8-SDAG-NEXT: v_cmp_lt_i64_e32 vcc, v[0:1], v[2:3]
40734073
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
40744074
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4175,8 +4175,8 @@ define i64 @test_vector_reduce_smin_v16i64(<16 x i64> %v) {
41754175
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
41764176
; GFX9-SDAG-NEXT: v_cmp_lt_i64_e32 vcc, v[2:3], v[4:5]
41774177
; GFX9-SDAG-NEXT: s_nop 1
4178-
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
41794178
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
4179+
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
41804180
; GFX9-SDAG-NEXT: v_cmp_lt_i64_e32 vcc, v[0:1], v[2:3]
41814181
; GFX9-SDAG-NEXT: s_nop 1
41824182
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
@@ -4283,8 +4283,8 @@ define i64 @test_vector_reduce_smin_v16i64(<16 x i64> %v) {
42834283
; GFX10-SDAG-NEXT: v_cmp_lt_i64_e64 s4, v[2:3], v[6:7]
42844284
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
42854285
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
4286-
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s4
42874286
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s4
4287+
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s4
42884288
; GFX10-SDAG-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[0:1], v[2:3]
42894289
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
42904290
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
@@ -4387,8 +4387,8 @@ define i64 @test_vector_reduce_smin_v16i64(<16 x i64> %v) {
43874387
; GFX11-SDAG-NEXT: v_cmp_lt_i64_e64 s0, v[2:3], v[6:7]
43884388
; GFX11-SDAG-NEXT: v_dual_cndmask_b32 v1, v5, v1 :: v_dual_cndmask_b32 v0, v4, v0
43894389
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
4390-
; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
43914390
; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s0
4391+
; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
43924392
; GFX11-SDAG-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[0:1], v[2:3]
43934393
; GFX11-SDAG-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_cndmask_b32 v1, v3, v1
43944394
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -4502,8 +4502,8 @@ define i64 @test_vector_reduce_smin_v16i64(<16 x i64> %v) {
45024502
; GFX12-SDAG-NEXT: v_dual_cndmask_b32 v1, v5, v1 :: v_dual_cndmask_b32 v0, v4, v0
45034503
; GFX12-SDAG-NEXT: s_wait_alu 0xf1ff
45044504
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
4505-
; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
45064505
; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s0
4506+
; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
45074507
; GFX12-SDAG-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[0:1], v[2:3]
45084508
; GFX12-SDAG-NEXT: s_wait_alu 0xfffd
45094509
; GFX12-SDAG-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_cndmask_b32 v1, v3, v1

llvm/test/CodeGen/AMDGPU/vector-reduce-umax.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3839,8 +3839,8 @@ define i64 @test_vector_reduce_umax_v16i64(<16 x i64> %v) {
38393839
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
38403840
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
38413841
; GFX7-SDAG-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[4:5]
3842-
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
38433842
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
3843+
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
38443844
; GFX7-SDAG-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
38453845
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
38463846
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -3943,8 +3943,8 @@ define i64 @test_vector_reduce_umax_v16i64(<16 x i64> %v) {
39433943
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
39443944
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
39453945
; GFX8-SDAG-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[4:5]
3946-
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
39473946
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
3947+
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
39483948
; GFX8-SDAG-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
39493949
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
39503950
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4051,8 +4051,8 @@ define i64 @test_vector_reduce_umax_v16i64(<16 x i64> %v) {
40514051
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
40524052
; GFX9-SDAG-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[4:5]
40534053
; GFX9-SDAG-NEXT: s_nop 1
4054-
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
40554054
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
4055+
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
40564056
; GFX9-SDAG-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
40574057
; GFX9-SDAG-NEXT: s_nop 1
40584058
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
@@ -4159,8 +4159,8 @@ define i64 @test_vector_reduce_umax_v16i64(<16 x i64> %v) {
41594159
; GFX10-SDAG-NEXT: v_cmp_gt_u64_e64 s4, v[2:3], v[6:7]
41604160
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
41614161
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
4162-
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s4
41634162
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s4
4163+
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s4
41644164
; GFX10-SDAG-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[2:3]
41654165
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
41664166
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
@@ -4263,8 +4263,8 @@ define i64 @test_vector_reduce_umax_v16i64(<16 x i64> %v) {
42634263
; GFX11-SDAG-NEXT: v_cmp_gt_u64_e64 s0, v[2:3], v[6:7]
42644264
; GFX11-SDAG-NEXT: v_dual_cndmask_b32 v1, v5, v1 :: v_dual_cndmask_b32 v0, v4, v0
42654265
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
4266-
; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
42674266
; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s0
4267+
; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
42684268
; GFX11-SDAG-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[2:3]
42694269
; GFX11-SDAG-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_cndmask_b32 v1, v3, v1
42704270
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -4378,8 +4378,8 @@ define i64 @test_vector_reduce_umax_v16i64(<16 x i64> %v) {
43784378
; GFX12-SDAG-NEXT: v_dual_cndmask_b32 v1, v5, v1 :: v_dual_cndmask_b32 v0, v4, v0
43794379
; GFX12-SDAG-NEXT: s_wait_alu 0xf1ff
43804380
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
4381-
; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
43824381
; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s0
4382+
; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
43834383
; GFX12-SDAG-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[2:3]
43844384
; GFX12-SDAG-NEXT: s_wait_alu 0xfffd
43854385
; GFX12-SDAG-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_cndmask_b32 v1, v3, v1

llvm/test/CodeGen/AMDGPU/vector-reduce-umin.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3585,8 +3585,8 @@ define i64 @test_vector_reduce_umin_v16i64(<16 x i64> %v) {
35853585
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
35863586
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
35873587
; GFX7-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[4:5]
3588-
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
35893588
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
3589+
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
35903590
; GFX7-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, v[0:1], v[2:3]
35913591
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
35923592
; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -3689,8 +3689,8 @@ define i64 @test_vector_reduce_umin_v16i64(<16 x i64> %v) {
36893689
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
36903690
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
36913691
; GFX8-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[4:5]
3692-
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
36933692
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
3693+
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
36943694
; GFX8-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, v[0:1], v[2:3]
36953695
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
36963696
; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -3797,8 +3797,8 @@ define i64 @test_vector_reduce_umin_v16i64(<16 x i64> %v) {
37973797
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
37983798
; GFX9-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[4:5]
37993799
; GFX9-SDAG-NEXT: s_nop 1
3800-
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
38013800
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
3801+
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
38023802
; GFX9-SDAG-NEXT: v_cmp_lt_u64_e32 vcc, v[0:1], v[2:3]
38033803
; GFX9-SDAG-NEXT: s_nop 1
38043804
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
@@ -3905,8 +3905,8 @@ define i64 @test_vector_reduce_umin_v16i64(<16 x i64> %v) {
39053905
; GFX10-SDAG-NEXT: v_cmp_lt_u64_e64 s4, v[2:3], v[6:7]
39063906
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
39073907
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
3908-
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s4
39093908
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s4
3909+
; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s4
39103910
; GFX10-SDAG-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3]
39113911
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
39123912
; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
@@ -4009,8 +4009,8 @@ define i64 @test_vector_reduce_umin_v16i64(<16 x i64> %v) {
40094009
; GFX11-SDAG-NEXT: v_cmp_lt_u64_e64 s0, v[2:3], v[6:7]
40104010
; GFX11-SDAG-NEXT: v_dual_cndmask_b32 v1, v5, v1 :: v_dual_cndmask_b32 v0, v4, v0
40114011
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
4012-
; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
40134012
; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s0
4013+
; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
40144014
; GFX11-SDAG-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3]
40154015
; GFX11-SDAG-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_cndmask_b32 v1, v3, v1
40164016
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -4124,8 +4124,8 @@ define i64 @test_vector_reduce_umin_v16i64(<16 x i64> %v) {
41244124
; GFX12-SDAG-NEXT: v_dual_cndmask_b32 v1, v5, v1 :: v_dual_cndmask_b32 v0, v4, v0
41254125
; GFX12-SDAG-NEXT: s_wait_alu 0xf1ff
41264126
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
4127-
; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
41284127
; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s0
4128+
; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s0
41294129
; GFX12-SDAG-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3]
41304130
; GFX12-SDAG-NEXT: s_wait_alu 0xfffd
41314131
; GFX12-SDAG-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_cndmask_b32 v1, v3, v1

0 commit comments

Comments
 (0)