Skip to content

Commit 817f8e9

Browse files
committed
[LICM] Do not reassociate constant offset GEP
LICM tries to reassociate GEPs in order to hoist an invariant GEP. Currently, it also does this in the case where the GEP has a constant offset. This is usually undesirable. From a back-end perspective, constant GEPs are usually free because they can be folded into addressing modes, so this just increases register pressume. From a middle-end perspective, keeping constant offsets last in the chain makes it easier to analyze the relationship between multiple GEPs on the same base. The worst that can happen here is if we start with something like ``` loop { p + 4*x p + 4*x + 1 p + 4*x + 2 p + 4*x + 3 } ``` And LICM converts it into: ``` p.1 = p + 1 p.2 = p + 2 p.3 = p + 3 loop { p + 4*x p.1 + 4*x p.2 + 4*x p.3 + 4*x } ``` Which is much worse than leaving it for CSE to convert to: ``` loop { p2 = p + 4*x p2 + 1 p2 + 2 p2 + 3 } ```
1 parent e2bd92e commit 817f8e9

File tree

9 files changed

+291
-248
lines changed

9 files changed

+291
-248
lines changed

llvm/lib/Transforms/Scalar/LICM.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2517,6 +2517,12 @@ static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
25172517
if (!L.isLoopInvariant(SrcPtr) || !all_of(GEP->indices(), LoopInvariant))
25182518
return false;
25192519

2520+
// Do not try to hoist a constant GEP out of the loop via reassociation.
2521+
// Constant GEPs can often be folded into addressing modes, and reassociating
2522+
// them may inhibit CSE of a common base.
2523+
if (GEP->hasAllConstantIndices())
2524+
return false;
2525+
25202526
// This can only happen if !AllowSpeculation, otherwise this would already be
25212527
// handled.
25222528
// FIXME: Should we respect AllowSpeculation in these reassociation folds?

llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -400,9 +400,9 @@ define amdgpu_kernel void @copy_flat_divergent(ptr nocapture %d, ptr nocapture r
400400
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
401401
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0
402402
; GFX12-NEXT: s_wait_kmcnt 0x0
403-
; GFX12-NEXT: v_add_co_u32 v2, s1, s6, v0
403+
; GFX12-NEXT: v_add_co_u32 v2, s1, v0, s6
404404
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
405-
; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1
405+
; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1
406406
; GFX12-NEXT: v_add_co_u32 v0, s1, s4, v0
407407
; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2
408408
; GFX12-NEXT: s_wait_alu 0xf1ff
@@ -438,9 +438,9 @@ define amdgpu_kernel void @copy_flat_divergent(ptr nocapture %d, ptr nocapture r
438438
; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
439439
; GFX12-SPREFETCH-NEXT: v_lshlrev_b32_e32 v0, 4, v0
440440
; GFX12-SPREFETCH-NEXT: s_wait_kmcnt 0x0
441-
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, s6, v0
441+
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, v0, s6
442442
; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
443-
; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1
443+
; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1
444444
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v0, s1, s4, v0
445445
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2
446446
; GFX12-SPREFETCH-NEXT: s_wait_alu 0xf1ff
@@ -531,9 +531,9 @@ define amdgpu_kernel void @copy_global_divergent(ptr addrspace(1) nocapture %d,
531531
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
532532
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0
533533
; GFX12-NEXT: s_wait_kmcnt 0x0
534-
; GFX12-NEXT: v_add_co_u32 v2, s1, s6, v0
534+
; GFX12-NEXT: v_add_co_u32 v2, s1, v0, s6
535535
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
536-
; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1
536+
; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1
537537
; GFX12-NEXT: v_add_co_u32 v0, s1, s4, v0
538538
; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2
539539
; GFX12-NEXT: s_wait_alu 0xf1ff
@@ -569,9 +569,9 @@ define amdgpu_kernel void @copy_global_divergent(ptr addrspace(1) nocapture %d,
569569
; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
570570
; GFX12-SPREFETCH-NEXT: v_lshlrev_b32_e32 v0, 4, v0
571571
; GFX12-SPREFETCH-NEXT: s_wait_kmcnt 0x0
572-
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, s6, v0
572+
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, v0, s6
573573
; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
574-
; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1
574+
; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1
575575
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v0, s1, s4, v0
576576
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2
577577
; GFX12-SPREFETCH-NEXT: s_wait_alu 0xf1ff

llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6982,7 +6982,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1
69826982
; CHECK-NEXT: global_store_dwordx4 v[100:101], v[96:99], off offset:16
69836983
; CHECK-NEXT: s_cmp_lg_u64 s[4:5], 0x800
69846984
; CHECK-NEXT: s_cbranch_scc1 .LBB6_2
6985-
; CHECK-NEXT: .LBB6_3: ; %Flow9
6985+
; CHECK-NEXT: .LBB6_3: ; %Flow7
69866986
; CHECK-NEXT: s_andn2_saveexec_b32 s8, s6
69876987
; CHECK-NEXT: s_cbranch_execz .LBB6_6
69886988
; CHECK-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader
@@ -7048,7 +7048,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1
70487048
; CHECK-NEXT: global_store_dwordx4 v[100:101], v[96:99], off offset:16
70497049
; CHECK-NEXT: s_cmp_eq_u64 s[4:5], s[6:7]
70507050
; CHECK-NEXT: s_cbranch_scc0 .LBB6_5
7051-
; CHECK-NEXT: .LBB6_6: ; %Flow10
7051+
; CHECK-NEXT: .LBB6_6: ; %Flow8
70527052
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
70537053
; CHECK-NEXT: s_setpc_b64 s[30:31]
70547054
;
@@ -7689,7 +7689,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1
76897689
; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:3
76907690
; ALIGNED-NEXT: global_store_byte v[16:17], v4, off offset:1
76917691
; ALIGNED-NEXT: s_cbranch_scc1 .LBB6_2
7692-
; ALIGNED-NEXT: .LBB6_3: ; %Flow9
7692+
; ALIGNED-NEXT: .LBB6_3: ; %Flow7
76937693
; ALIGNED-NEXT: s_andn2_saveexec_b32 s8, s6
76947694
; ALIGNED-NEXT: s_cbranch_execz .LBB6_6
76957695
; ALIGNED-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader
@@ -8316,7 +8316,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1
83168316
; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:3
83178317
; ALIGNED-NEXT: global_store_byte v[16:17], v4, off offset:1
83188318
; ALIGNED-NEXT: s_cbranch_scc0 .LBB6_5
8319-
; ALIGNED-NEXT: .LBB6_6: ; %Flow10
8319+
; ALIGNED-NEXT: .LBB6_6: ; %Flow8
83208320
; ALIGNED-NEXT: s_or_b32 exec_lo, exec_lo, s8
83218321
; ALIGNED-NEXT: s_clause 0x7
83228322
; ALIGNED-NEXT: buffer_load_dword v47, off, s[0:3], s32
@@ -8369,7 +8369,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1
83698369
; UNROLL3-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:2032
83708370
; UNROLL3-NEXT: ; implicit-def: $vgpr2_vgpr3
83718371
; UNROLL3-NEXT: ; implicit-def: $vgpr0_vgpr1
8372-
; UNROLL3-NEXT: .LBB6_4: ; %Flow7
8372+
; UNROLL3-NEXT: .LBB6_4: ; %Flow5
83738373
; UNROLL3-NEXT: s_andn2_saveexec_b32 s8, s6
83748374
; UNROLL3-NEXT: s_cbranch_execz .LBB6_7
83758375
; UNROLL3-NEXT: ; %bb.5: ; %memmove_bwd_residual
@@ -8403,7 +8403,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1
84038403
; UNROLL3-NEXT: global_store_dwordx4 v[16:17], v[12:15], off offset:32
84048404
; UNROLL3-NEXT: s_cmp_eq_u64 s[4:5], s[6:7]
84058405
; UNROLL3-NEXT: s_cbranch_scc0 .LBB6_6
8406-
; UNROLL3-NEXT: .LBB6_7: ; %Flow8
8406+
; UNROLL3-NEXT: .LBB6_7: ; %Flow6
84078407
; UNROLL3-NEXT: s_or_b32 exec_lo, exec_lo, s8
84088408
; UNROLL3-NEXT: s_setpc_b64 s[30:31]
84098409
entry:

llvm/test/CodeGen/AMDGPU/memmove-var-size.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -460,10 +460,10 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align
460460
; CHECK-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1]
461461
; CHECK-NEXT: s_xor_b32 s7, exec_lo, s6
462462
; CHECK-NEXT: s_cbranch_execnz .LBB3_3
463-
; CHECK-NEXT: ; %bb.1: ; %Flow34
463+
; CHECK-NEXT: ; %bb.1: ; %Flow36
464464
; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7
465465
; CHECK-NEXT: s_cbranch_execnz .LBB3_10
466-
; CHECK-NEXT: .LBB3_2: ; %Flow35
466+
; CHECK-NEXT: .LBB3_2: ; %Flow37
467467
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6
468468
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
469469
; CHECK-NEXT: s_setpc_b64 s[30:31]
@@ -494,7 +494,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align
494494
; CHECK-NEXT: v_add_co_ci_u32_e64 v11, null, 0, v11, s6
495495
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9
496496
; CHECK-NEXT: s_cbranch_execnz .LBB3_5
497-
; CHECK-NEXT: .LBB3_6: ; %Flow29
497+
; CHECK-NEXT: .LBB3_6: ; %Flow31
498498
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
499499
; CHECK-NEXT: s_and_saveexec_b32 s8, s4
500500
; CHECK-NEXT: s_cbranch_execz .LBB3_9
@@ -520,7 +520,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align
520520
; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, s6
521521
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9
522522
; CHECK-NEXT: s_cbranch_execnz .LBB3_8
523-
; CHECK-NEXT: .LBB3_9: ; %Flow27
523+
; CHECK-NEXT: .LBB3_9: ; %Flow29
524524
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
525525
; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7
526526
; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
@@ -556,7 +556,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align
556556
; CHECK-NEXT: v_add_co_ci_u32_e64 v5, null, -1, v5, s5
557557
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s8
558558
; CHECK-NEXT: s_cbranch_execnz .LBB3_12
559-
; CHECK-NEXT: .LBB3_13: ; %Flow33
559+
; CHECK-NEXT: .LBB3_13: ; %Flow35
560560
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s7
561561
; CHECK-NEXT: s_and_saveexec_b32 s5, vcc_lo
562562
; CHECK-NEXT: s_cbranch_execz .LBB3_16
@@ -584,7 +584,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align
584584
; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[8:11]
585585
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s7
586586
; CHECK-NEXT: s_cbranch_execnz .LBB3_15
587-
; CHECK-NEXT: .LBB3_16: ; %Flow31
587+
; CHECK-NEXT: .LBB3_16: ; %Flow33
588588
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5
589589
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6
590590
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
@@ -907,10 +907,10 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align
907907
; CHECK-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1]
908908
; CHECK-NEXT: s_xor_b32 s7, exec_lo, s6
909909
; CHECK-NEXT: s_cbranch_execnz .LBB6_3
910-
; CHECK-NEXT: ; %bb.1: ; %Flow41
910+
; CHECK-NEXT: ; %bb.1: ; %Flow39
911911
; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7
912912
; CHECK-NEXT: s_cbranch_execnz .LBB6_10
913-
; CHECK-NEXT: .LBB6_2: ; %Flow42
913+
; CHECK-NEXT: .LBB6_2: ; %Flow40
914914
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6
915915
; CHECK-NEXT: s_setpc_b64 s[30:31]
916916
; CHECK-NEXT: .LBB6_3: ; %memmove_copy_forward
@@ -940,7 +940,7 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align
940940
; CHECK-NEXT: v_add_co_ci_u32_e64 v11, null, 0, v11, s6
941941
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9
942942
; CHECK-NEXT: s_cbranch_execnz .LBB6_5
943-
; CHECK-NEXT: .LBB6_6: ; %Flow36
943+
; CHECK-NEXT: .LBB6_6: ; %Flow34
944944
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
945945
; CHECK-NEXT: s_and_saveexec_b32 s8, s4
946946
; CHECK-NEXT: s_cbranch_execz .LBB6_9
@@ -966,11 +966,11 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align
966966
; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, s6
967967
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9
968968
; CHECK-NEXT: s_cbranch_execnz .LBB6_8
969-
; CHECK-NEXT: .LBB6_9: ; %Flow34
969+
; CHECK-NEXT: .LBB6_9: ; %Flow32
970970
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
971971
; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7
972-
; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
973972
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
973+
; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
974974
; CHECK-NEXT: ; implicit-def: $vgpr8_vgpr9
975975
; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
976976
; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7
@@ -1002,15 +1002,15 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align
10021002
; CHECK-NEXT: v_add_co_ci_u32_e64 v5, null, -1, v5, s5
10031003
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s8
10041004
; CHECK-NEXT: s_cbranch_execnz .LBB6_12
1005-
; CHECK-NEXT: .LBB6_13: ; %Flow40
1005+
; CHECK-NEXT: .LBB6_13: ; %Flow38
10061006
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s7
10071007
; CHECK-NEXT: s_and_saveexec_b32 s5, vcc_lo
10081008
; CHECK-NEXT: s_cbranch_execz .LBB6_16
10091009
; CHECK-NEXT: ; %bb.14: ; %memmove_bwd_main_loop.preheader
1010-
; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16
1011-
; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo
10121010
; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v0, -16
10131011
; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
1012+
; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16
1013+
; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo
10141014
; CHECK-NEXT: s_mov_b32 s7, 0
10151015
; CHECK-NEXT: .p2align 6
10161016
; CHECK-NEXT: .LBB6_15: ; %memmove_bwd_main_loop
@@ -1030,7 +1030,7 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align
10301030
; CHECK-NEXT: global_store_dwordx4 v[12:13], v[8:11], off
10311031
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s7
10321032
; CHECK-NEXT: s_cbranch_execnz .LBB6_15
1033-
; CHECK-NEXT: .LBB6_16: ; %Flow38
1033+
; CHECK-NEXT: .LBB6_16: ; %Flow36
10341034
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5
10351035
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6
10361036
; CHECK-NEXT: s_setpc_b64 s[30:31]
@@ -1181,8 +1181,8 @@ define void @memmove_p1_p4(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align
11811181
; CHECK-NEXT: .LBB8_9: ; %Flow31
11821182
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
11831183
; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7
1184-
; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
11851184
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
1185+
; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
11861186
; CHECK-NEXT: ; implicit-def: $vgpr8_vgpr9
11871187
; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
11881188
; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7
@@ -1219,10 +1219,10 @@ define void @memmove_p1_p4(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align
12191219
; CHECK-NEXT: s_and_saveexec_b32 s5, vcc_lo
12201220
; CHECK-NEXT: s_cbranch_execz .LBB8_16
12211221
; CHECK-NEXT: ; %bb.14: ; %memmove_bwd_main_loop.preheader
1222-
; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16
1223-
; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo
12241222
; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v0, -16
12251223
; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
1224+
; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16
1225+
; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo
12261226
; CHECK-NEXT: s_mov_b32 s7, 0
12271227
; CHECK-NEXT: .p2align 6
12281228
; CHECK-NEXT: .LBB8_15: ; %memmove_bwd_main_loop

0 commit comments

Comments
 (0)