diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 68094c354cf46..c3f80f901a120 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -2508,6 +2508,12 @@ static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, if (!GEP) return false; + // Do not try to hoist a constant GEP out of the loop via reassociation. + // Constant GEPs can often be folded into addressing modes, and reassociating + // them may inhibit CSE of a common base. + if (GEP->hasAllConstantIndices()) + return false; + auto *Src = dyn_cast(GEP->getPointerOperand()); if (!Src || !Src->hasOneUse() || !L.contains(Src)) return false; diff --git a/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll b/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll index 22ebb55826043..702a69f776de3 100644 --- a/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll +++ b/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll @@ -400,9 +400,9 @@ define amdgpu_kernel void @copy_flat_divergent(ptr nocapture %d, ptr nocapture r ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_add_co_u32 v2, s1, s6, v0 +; GFX12-NEXT: v_add_co_u32 v2, s1, v0, s6 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1 +; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1 ; GFX12-NEXT: v_add_co_u32 v0, s1, s4, v0 ; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2 ; GFX12-NEXT: s_wait_alu 0xf1ff @@ -438,9 +438,9 @@ define amdgpu_kernel void @copy_flat_divergent(ptr nocapture %d, ptr nocapture r ; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX12-SPREFETCH-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GFX12-SPREFETCH-NEXT: s_wait_kmcnt 0x0 -; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, s6, v0 +; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, v0, s6 ; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1 +; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1 ; GFX12-SPREFETCH-NEXT: v_add_co_u32 v0, s1, s4, v0 ; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2 ; GFX12-SPREFETCH-NEXT: s_wait_alu 0xf1ff @@ -531,9 +531,9 @@ define amdgpu_kernel void @copy_global_divergent(ptr addrspace(1) nocapture %d, ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_add_co_u32 v2, s1, s6, v0 +; GFX12-NEXT: v_add_co_u32 v2, s1, v0, s6 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1 +; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1 ; GFX12-NEXT: v_add_co_u32 v0, s1, s4, v0 ; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2 ; GFX12-NEXT: s_wait_alu 0xf1ff @@ -569,9 +569,9 @@ define amdgpu_kernel void @copy_global_divergent(ptr addrspace(1) nocapture %d, ; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX12-SPREFETCH-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GFX12-SPREFETCH-NEXT: s_wait_kmcnt 0x0 -; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, s6, v0 +; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, v0, s6 ; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1 +; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1 ; GFX12-SPREFETCH-NEXT: v_add_co_u32 v0, s1, s4, v0 ; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2 ; GFX12-SPREFETCH-NEXT: s_wait_alu 0xf1ff diff --git a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll index be020457ce87d..4c0ab91b7d622 100644 --- a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll +++ b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll @@ -6982,7 +6982,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1 ; CHECK-NEXT: global_store_dwordx4 v[100:101], v[96:99], off offset:16 ; CHECK-NEXT: s_cmp_lg_u64 s[4:5], 0x800 ; CHECK-NEXT: s_cbranch_scc1 .LBB6_2 -; CHECK-NEXT: .LBB6_3: ; %Flow9 +; CHECK-NEXT: .LBB6_3: ; %Flow7 ; CHECK-NEXT: s_andn2_saveexec_b32 s8, s6 ; CHECK-NEXT: s_cbranch_execz .LBB6_6 ; CHECK-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader @@ -7048,7 +7048,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1 ; CHECK-NEXT: global_store_dwordx4 v[100:101], v[96:99], off offset:16 ; CHECK-NEXT: s_cmp_eq_u64 s[4:5], s[6:7] ; CHECK-NEXT: s_cbranch_scc0 .LBB6_5 -; CHECK-NEXT: .LBB6_6: ; %Flow10 +; CHECK-NEXT: .LBB6_6: ; %Flow8 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; CHECK-NEXT: s_setpc_b64 s[30:31] ; @@ -7689,7 +7689,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1 ; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:3 ; ALIGNED-NEXT: global_store_byte v[16:17], v4, off offset:1 ; ALIGNED-NEXT: s_cbranch_scc1 .LBB6_2 -; ALIGNED-NEXT: .LBB6_3: ; %Flow9 +; ALIGNED-NEXT: .LBB6_3: ; %Flow7 ; ALIGNED-NEXT: s_andn2_saveexec_b32 s8, s6 ; ALIGNED-NEXT: s_cbranch_execz .LBB6_6 ; ALIGNED-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader @@ -8316,7 +8316,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1 ; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:3 ; ALIGNED-NEXT: global_store_byte v[16:17], v4, off offset:1 ; ALIGNED-NEXT: s_cbranch_scc0 .LBB6_5 -; ALIGNED-NEXT: .LBB6_6: ; %Flow10 +; ALIGNED-NEXT: .LBB6_6: ; %Flow8 ; ALIGNED-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; ALIGNED-NEXT: s_clause 0x7 ; ALIGNED-NEXT: buffer_load_dword v47, off, s[0:3], s32 @@ -8369,7 +8369,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1 ; UNROLL3-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:2032 ; UNROLL3-NEXT: ; implicit-def: $vgpr2_vgpr3 ; UNROLL3-NEXT: ; implicit-def: $vgpr0_vgpr1 -; UNROLL3-NEXT: .LBB6_4: ; %Flow7 +; UNROLL3-NEXT: .LBB6_4: ; %Flow5 ; UNROLL3-NEXT: s_andn2_saveexec_b32 s8, s6 ; UNROLL3-NEXT: s_cbranch_execz .LBB6_7 ; UNROLL3-NEXT: ; %bb.5: ; %memmove_bwd_residual @@ -8403,7 +8403,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1 ; UNROLL3-NEXT: global_store_dwordx4 v[16:17], v[12:15], off offset:32 ; UNROLL3-NEXT: s_cmp_eq_u64 s[4:5], s[6:7] ; UNROLL3-NEXT: s_cbranch_scc0 .LBB6_6 -; UNROLL3-NEXT: .LBB6_7: ; %Flow8 +; UNROLL3-NEXT: .LBB6_7: ; %Flow6 ; UNROLL3-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; UNROLL3-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/CodeGen/AMDGPU/memmove-var-size.ll b/llvm/test/CodeGen/AMDGPU/memmove-var-size.ll index 272daa9dd0b59..dd5c247f6ef35 100644 --- a/llvm/test/CodeGen/AMDGPU/memmove-var-size.ll +++ b/llvm/test/CodeGen/AMDGPU/memmove-var-size.ll @@ -460,10 +460,10 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align ; CHECK-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1] ; CHECK-NEXT: s_xor_b32 s7, exec_lo, s6 ; CHECK-NEXT: s_cbranch_execnz .LBB3_3 -; CHECK-NEXT: ; %bb.1: ; %Flow34 +; CHECK-NEXT: ; %bb.1: ; %Flow36 ; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7 ; CHECK-NEXT: s_cbranch_execnz .LBB3_10 -; CHECK-NEXT: .LBB3_2: ; %Flow35 +; CHECK-NEXT: .LBB3_2: ; %Flow37 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -494,7 +494,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align ; CHECK-NEXT: v_add_co_ci_u32_e64 v11, null, 0, v11, s6 ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; CHECK-NEXT: s_cbranch_execnz .LBB3_5 -; CHECK-NEXT: .LBB3_6: ; %Flow29 +; CHECK-NEXT: .LBB3_6: ; %Flow31 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; CHECK-NEXT: s_and_saveexec_b32 s8, s4 ; CHECK-NEXT: s_cbranch_execz .LBB3_9 @@ -520,7 +520,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align ; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, s6 ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; CHECK-NEXT: s_cbranch_execnz .LBB3_8 -; CHECK-NEXT: .LBB3_9: ; %Flow27 +; CHECK-NEXT: .LBB3_9: ; %Flow29 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7 ; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 @@ -556,7 +556,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align ; CHECK-NEXT: v_add_co_ci_u32_e64 v5, null, -1, v5, s5 ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; CHECK-NEXT: s_cbranch_execnz .LBB3_12 -; CHECK-NEXT: .LBB3_13: ; %Flow33 +; CHECK-NEXT: .LBB3_13: ; %Flow35 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s7 ; CHECK-NEXT: s_and_saveexec_b32 s5, vcc_lo ; CHECK-NEXT: s_cbranch_execz .LBB3_16 @@ -584,7 +584,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align ; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[8:11] ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s7 ; CHECK-NEXT: s_cbranch_execnz .LBB3_15 -; CHECK-NEXT: .LBB3_16: ; %Flow31 +; CHECK-NEXT: .LBB3_16: ; %Flow33 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) @@ -907,10 +907,10 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align ; CHECK-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1] ; CHECK-NEXT: s_xor_b32 s7, exec_lo, s6 ; CHECK-NEXT: s_cbranch_execnz .LBB6_3 -; CHECK-NEXT: ; %bb.1: ; %Flow41 +; CHECK-NEXT: ; %bb.1: ; %Flow39 ; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7 ; CHECK-NEXT: s_cbranch_execnz .LBB6_10 -; CHECK-NEXT: .LBB6_2: ; %Flow42 +; CHECK-NEXT: .LBB6_2: ; %Flow40 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6 ; CHECK-NEXT: s_setpc_b64 s[30:31] ; CHECK-NEXT: .LBB6_3: ; %memmove_copy_forward @@ -940,7 +940,7 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align ; CHECK-NEXT: v_add_co_ci_u32_e64 v11, null, 0, v11, s6 ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; CHECK-NEXT: s_cbranch_execnz .LBB6_5 -; CHECK-NEXT: .LBB6_6: ; %Flow36 +; CHECK-NEXT: .LBB6_6: ; %Flow34 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; CHECK-NEXT: s_and_saveexec_b32 s8, s4 ; CHECK-NEXT: s_cbranch_execz .LBB6_9 @@ -966,11 +966,11 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align ; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, s6 ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; CHECK-NEXT: s_cbranch_execnz .LBB6_8 -; CHECK-NEXT: .LBB6_9: ; %Flow34 +; CHECK-NEXT: .LBB6_9: ; %Flow32 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7 -; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 +; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CHECK-NEXT: ; implicit-def: $vgpr8_vgpr9 ; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5 ; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7 @@ -1002,15 +1002,15 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align ; CHECK-NEXT: v_add_co_ci_u32_e64 v5, null, -1, v5, s5 ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; CHECK-NEXT: s_cbranch_execnz .LBB6_12 -; CHECK-NEXT: .LBB6_13: ; %Flow40 +; CHECK-NEXT: .LBB6_13: ; %Flow38 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s7 ; CHECK-NEXT: s_and_saveexec_b32 s5, vcc_lo ; CHECK-NEXT: s_cbranch_execz .LBB6_16 ; CHECK-NEXT: ; %bb.14: ; %memmove_bwd_main_loop.preheader -; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16 -; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo ; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v0, -16 ; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo +; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16 +; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo ; CHECK-NEXT: s_mov_b32 s7, 0 ; CHECK-NEXT: .p2align 6 ; CHECK-NEXT: .LBB6_15: ; %memmove_bwd_main_loop @@ -1030,7 +1030,7 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align ; CHECK-NEXT: global_store_dwordx4 v[12:13], v[8:11], off ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s7 ; CHECK-NEXT: s_cbranch_execnz .LBB6_15 -; CHECK-NEXT: .LBB6_16: ; %Flow38 +; CHECK-NEXT: .LBB6_16: ; %Flow36 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1181,8 +1181,8 @@ define void @memmove_p1_p4(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align ; CHECK-NEXT: .LBB8_9: ; %Flow31 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7 -; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 +; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CHECK-NEXT: ; implicit-def: $vgpr8_vgpr9 ; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5 ; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7 @@ -1219,10 +1219,10 @@ define void @memmove_p1_p4(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align ; CHECK-NEXT: s_and_saveexec_b32 s5, vcc_lo ; CHECK-NEXT: s_cbranch_execz .LBB8_16 ; CHECK-NEXT: ; %bb.14: ; %memmove_bwd_main_loop.preheader -; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16 -; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo ; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v0, -16 ; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo +; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16 +; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo ; CHECK-NEXT: s_mov_b32 s7, 0 ; CHECK-NEXT: .p2align 6 ; CHECK-NEXT: .LBB8_15: ; %memmove_bwd_main_loop diff --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll index 9f62477ae01df..af0942e99182d 100644 --- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll @@ -56,155 +56,153 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: .cfi_offset v29, -240 ; CHECK-NEXT: .cfi_offset v30, -224 ; CHECK-NEXT: .cfi_offset v31, -208 +; CHECK-NEXT: std 14, 400(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, 408(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 2, 728(1) +; CHECK-NEXT: ld 14, 688(1) +; CHECK-NEXT: ld 11, 704(1) +; CHECK-NEXT: std 20, 448(1) # 8-byte Folded Spill +; CHECK-NEXT: std 21, 456(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 21, 5 +; CHECK-NEXT: lwa 5, 0(7) +; CHECK-NEXT: ld 7, 720(1) ; CHECK-NEXT: std 22, 464(1) # 8-byte Folded Spill ; CHECK-NEXT: std 23, 472(1) # 8-byte Folded Spill -; CHECK-NEXT: mr 22, 5 -; CHECK-NEXT: ld 5, 848(1) +; CHECK-NEXT: mr 22, 6 +; CHECK-NEXT: ld 6, 848(1) ; CHECK-NEXT: addi 3, 3, 1 -; CHECK-NEXT: mr 11, 7 -; CHECK-NEXT: ld 23, 688(1) -; CHECK-NEXT: ld 7, 728(1) +; CHECK-NEXT: ld 15, 736(1) ; CHECK-NEXT: std 18, 432(1) # 8-byte Folded Spill ; CHECK-NEXT: std 19, 440(1) # 8-byte Folded Spill -; CHECK-NEXT: mr 18, 6 -; CHECK-NEXT: li 6, 9 ; CHECK-NEXT: ld 19, 768(1) -; CHECK-NEXT: ld 2, 760(1) -; CHECK-NEXT: std 26, 496(1) # 8-byte Folded Spill -; CHECK-NEXT: std 27, 504(1) # 8-byte Folded Spill -; CHECK-NEXT: cmpldi 3, 9 -; CHECK-NEXT: ld 27, 816(1) -; CHECK-NEXT: ld 26, 808(1) -; CHECK-NEXT: std 14, 400(1) # 8-byte Folded Spill -; CHECK-NEXT: std 15, 408(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 15, 736(1) -; CHECK-NEXT: lxv 39, 0(8) +; CHECK-NEXT: ld 18, 760(1) ; CHECK-NEXT: std 30, 528(1) # 8-byte Folded Spill ; CHECK-NEXT: std 31, 536(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 30, 704(1) -; CHECK-NEXT: lxv 38, 0(9) -; CHECK-NEXT: std 20, 448(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 456(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 21, 784(1) +; CHECK-NEXT: ld 12, 696(1) +; CHECK-NEXT: lxv 0, 0(9) +; CHECK-NEXT: std 9, 64(1) # 8-byte Folded Spill +; CHECK-NEXT: std 10, 72(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 1, 0(8) +; CHECK-NEXT: cmpldi 3, 9 +; CHECK-NEXT: ld 30, 824(1) +; CHECK-NEXT: std 28, 512(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 520(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 29, 840(1) +; CHECK-NEXT: ld 28, 832(1) +; CHECK-NEXT: std 16, 416(1) # 8-byte Folded Spill +; CHECK-NEXT: std 17, 424(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 23, 784(1) ; CHECK-NEXT: ld 20, 776(1) ; CHECK-NEXT: std 24, 480(1) # 8-byte Folded Spill ; CHECK-NEXT: std 25, 488(1) # 8-byte Folded Spill -; CHECK-NEXT: iselgt 3, 3, 6 -; CHECK-NEXT: ld 6, 720(1) +; CHECK-NEXT: ld 25, 800(1) ; CHECK-NEXT: ld 24, 792(1) -; CHECK-NEXT: std 10, 72(1) # 8-byte Folded Spill -; CHECK-NEXT: std 7, 80(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, 496(1) # 8-byte Folded Spill +; CHECK-NEXT: std 27, 504(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 27, 816(1) +; CHECK-NEXT: ld 26, 808(1) +; CHECK-NEXT: stfd 26, 544(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 27, 552(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 17, 752(1) +; CHECK-NEXT: extswsli 9, 5, 3 +; CHECK-NEXT: lxv 4, 0(14) +; CHECK-NEXT: std 14, 32(1) # 8-byte Folded Spill +; CHECK-NEXT: std 12, 40(1) # 8-byte Folded Spill +; CHECK-NEXT: mulli 0, 5, 40 +; CHECK-NEXT: sldi 14, 5, 5 +; CHECK-NEXT: mulli 31, 5, 24 +; CHECK-NEXT: lxv 38, 0(2) +; CHECK-NEXT: lxv 2, 0(11) +; CHECK-NEXT: std 2, 80(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, 88(1) # 8-byte Folded Spill +; CHECK-NEXT: mulli 2, 5, 48 +; CHECK-NEXT: sldi 5, 5, 4 +; CHECK-NEXT: ld 16, 744(1) +; CHECK-NEXT: lxv 5, 0(10) +; CHECK-NEXT: std 6, 200(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 192(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 6, 712(1) +; CHECK-NEXT: mr 10, 7 +; CHECK-NEXT: add 7, 14, 21 +; CHECK-NEXT: lxv 13, 0(19) +; CHECK-NEXT: std 8, 48(1) # 8-byte Folded Spill +; CHECK-NEXT: std 6, 56(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 8, 11 +; CHECK-NEXT: li 11, 9 +; CHECK-NEXT: iselgt 3, 3, 11 ; CHECK-NEXT: addi 3, 3, -2 -; CHECK-NEXT: lxv 6, 0(19) -; CHECK-NEXT: lxv 11, 0(7) -; CHECK-NEXT: std 5, 200(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, 40(1) # 8-byte Folded Spill -; CHECK-NEXT: std 6, 48(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 5, 840(1) -; CHECK-NEXT: lxv 12, 0(6) -; CHECK-NEXT: rldicl 12, 3, 61, 3 +; CHECK-NEXT: rldicl 11, 3, 61, 3 +; CHECK-NEXT: lxv 3, 0(12) +; CHECK-NEXT: lxv 40, 0(6) +; CHECK-NEXT: std 18, 112(1) # 8-byte Folded Spill ; CHECK-NEXT: std 19, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: add 19, 21, 5 +; CHECK-NEXT: ld 5, 200(1) # 8-byte Folded Reload +; CHECK-NEXT: lxv 39, 0(10) +; CHECK-NEXT: addi 3, 7, 32 +; CHECK-NEXT: add 12, 31, 21 ; CHECK-NEXT: std 20, 128(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 136(1) # 8-byte Folded Spill -; CHECK-NEXT: std 24, 144(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 4, 0(21) -; CHECK-NEXT: ld 25, 800(1) -; CHECK-NEXT: lxv 33, 0(10) -; CHECK-NEXT: lxv 32, 0(23) -; CHECK-NEXT: lxv 36, 0(30) -; CHECK-NEXT: std 16, 416(1) # 8-byte Folded Spill -; CHECK-NEXT: std 17, 424(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 17, 752(1) -; CHECK-NEXT: ld 16, 744(1) -; CHECK-NEXT: std 28, 512(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, 520(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 29, 712(1) -; CHECK-NEXT: ld 28, 696(1) -; CHECK-NEXT: std 8, 56(1) # 8-byte Folded Spill -; CHECK-NEXT: std 9, 64(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 37, 0(28) -; CHECK-NEXT: lxv 13, 0(29) -; CHECK-NEXT: mr 8, 29 -; CHECK-NEXT: mr 9, 30 -; CHECK-NEXT: mr 10, 28 -; CHECK-NEXT: std 25, 152(1) # 8-byte Folded Spill +; CHECK-NEXT: std 23, 136(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 33, 0(15) +; CHECK-NEXT: lxv 32, 0(16) ; CHECK-NEXT: std 26, 160(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 10, 0(15) -; CHECK-NEXT: lxv 9, 0(16) -; CHECK-NEXT: li 28, 1 -; CHECK-NEXT: stfd 26, 544(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 27, 552(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 8, 0(17) -; CHECK-NEXT: lxv 7, 0(2) +; CHECK-NEXT: std 27, 168(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 37, 0(17) +; CHECK-NEXT: lxv 36, 0(18) +; CHECK-NEXT: std 30, 176(1) # 8-byte Folded Spill +; CHECK-NEXT: std 28, 184(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 12, 0(20) +; CHECK-NEXT: lxv 11, 0(23) +; CHECK-NEXT: add 20, 21, 9 ; CHECK-NEXT: stfd 28, 560(1) # 8-byte Folded Spill ; CHECK-NEXT: stfd 29, 568(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 5, 0(20) -; CHECK-NEXT: lxv 3, 0(24) +; CHECK-NEXT: lxv 10, 0(24) +; CHECK-NEXT: lxv 9, 0(25) ; CHECK-NEXT: stfd 30, 576(1) # 8-byte Folded Spill ; CHECK-NEXT: stfd 31, 584(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 2, 0(25) -; CHECK-NEXT: lxv 1, 0(26) +; CHECK-NEXT: lxv 8, 0(26) +; CHECK-NEXT: lxv 7, 0(27) +; CHECK-NEXT: addi 12, 12, 32 +; CHECK-NEXT: li 27, 0 +; CHECK-NEXT: mr 26, 21 ; CHECK-NEXT: stxv 52, 208(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 53, 224(1) # 16-byte Folded Spill -; CHECK-NEXT: lxv 0, 0(27) +; CHECK-NEXT: lxv 6, 0(30) +; CHECK-NEXT: lxv 41, 0(28) +; CHECK-NEXT: addi 7, 11, 1 +; CHECK-NEXT: add 11, 0, 21 +; CHECK-NEXT: li 28, 1 ; CHECK-NEXT: stxv 54, 240(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 55, 256(1) # 16-byte Folded Spill +; CHECK-NEXT: lxv 43, 0(29) +; CHECK-NEXT: lxv 42, 0(5) ; CHECK-NEXT: stxv 56, 272(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 57, 288(1) # 16-byte Folded Spill +; CHECK-NEXT: addi 11, 11, 32 ; CHECK-NEXT: stxv 58, 304(1) # 16-byte Folded Spill -; CHECK-NEXT: std 5, 192(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 5, 832(1) ; CHECK-NEXT: stxv 59, 320(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 60, 336(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 61, 352(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 62, 368(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 63, 384(1) # 16-byte Folded Spill -; CHECK-NEXT: std 15, 88(1) # 8-byte Folded Spill ; CHECK-NEXT: std 16, 96(1) # 8-byte Folded Spill ; CHECK-NEXT: std 17, 104(1) # 8-byte Folded Spill -; CHECK-NEXT: std 2, 112(1) # 8-byte Folded Spill -; CHECK-NEXT: std 5, 184(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 5, 824(1) -; CHECK-NEXT: std 5, 176(1) # 8-byte Folded Spill -; CHECK-NEXT: std 27, 168(1) # 8-byte Folded Spill -; CHECK-NEXT: lwa 5, 0(11) -; CHECK-NEXT: li 27, 0 -; CHECK-NEXT: ld 7, 176(1) # 8-byte Folded Reload -; CHECK-NEXT: mulli 6, 5, 40 -; CHECK-NEXT: sldi 0, 5, 4 -; CHECK-NEXT: extswsli 14, 5, 3 -; CHECK-NEXT: lxv 40, 0(7) -; CHECK-NEXT: ld 7, 184(1) # 8-byte Folded Reload -; CHECK-NEXT: add 31, 14, 22 -; CHECK-NEXT: add 11, 0, 22 -; CHECK-NEXT: mr 26, 22 -; CHECK-NEXT: addi 3, 11, 32 -; CHECK-NEXT: addi 11, 12, 1 -; CHECK-NEXT: mulli 12, 5, 48 -; CHECK-NEXT: addi 31, 31, 32 -; CHECK-NEXT: add 19, 22, 6 -; CHECK-NEXT: sldi 6, 5, 5 -; CHECK-NEXT: mulli 5, 5, 24 -; CHECK-NEXT: lxv 41, 0(7) -; CHECK-NEXT: add 20, 22, 6 -; CHECK-NEXT: add 21, 22, 5 -; CHECK-NEXT: ld 5, 192(1) # 8-byte Folded Reload -; CHECK-NEXT: lxv 43, 0(5) -; CHECK-NEXT: ld 5, 200(1) # 8-byte Folded Reload -; CHECK-NEXT: lxv 42, 0(5) +; CHECK-NEXT: std 24, 144(1) # 8-byte Folded Spill +; CHECK-NEXT: std 25, 152(1) # 8-byte Folded Spill ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_3: # %_loop_2_do_.lr.ph ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_4 Depth 2 -; CHECK-NEXT: maddld 5, 12, 27, 0 -; CHECK-NEXT: mr 6, 18 -; CHECK-NEXT: mr 29, 21 +; CHECK-NEXT: maddld 5, 2, 27, 0 +; CHECK-NEXT: mr 6, 22 ; CHECK-NEXT: mr 30, 20 -; CHECK-NEXT: mr 2, 19 -; CHECK-NEXT: mtctr 11 -; CHECK-NEXT: add 25, 22, 5 -; CHECK-NEXT: maddld 5, 12, 27, 14 -; CHECK-NEXT: add 24, 22, 5 +; CHECK-NEXT: mr 29, 19 +; CHECK-NEXT: mtctr 7 +; CHECK-NEXT: add 25, 21, 5 +; CHECK-NEXT: maddld 5, 2, 27, 14 +; CHECK-NEXT: add 24, 21, 5 +; CHECK-NEXT: maddld 5, 2, 27, 31 +; CHECK-NEXT: add 23, 21, 5 ; CHECK-NEXT: mr 5, 26 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_4: # %_loop_2_do_ @@ -212,66 +210,66 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lxvp 34, 0(6) ; CHECK-NEXT: lxvp 44, 0(5) -; CHECK-NEXT: xvmaddadp 39, 45, 35 -; CHECK-NEXT: lxvp 46, 0(24) -; CHECK-NEXT: xvmaddadp 38, 47, 35 -; CHECK-NEXT: lxvp 48, 0(25) -; CHECK-NEXT: lxvp 50, 0(29) -; CHECK-NEXT: lxvp 62, 0(30) -; CHECK-NEXT: lxvp 60, 0(2) +; CHECK-NEXT: xvmaddadp 1, 45, 35 +; CHECK-NEXT: lxvp 46, 0(30) +; CHECK-NEXT: xvmaddadp 0, 47, 35 +; CHECK-NEXT: lxvp 48, 0(29) +; CHECK-NEXT: lxvp 50, 0(23) +; CHECK-NEXT: lxvp 62, 0(24) +; CHECK-NEXT: lxvp 60, 0(25) ; CHECK-NEXT: lxvp 58, 32(6) ; CHECK-NEXT: lxvp 56, 32(5) -; CHECK-NEXT: lxvp 54, 32(24) -; CHECK-NEXT: lxvp 52, 32(25) -; CHECK-NEXT: lxvp 30, 32(29) -; CHECK-NEXT: lxvp 28, 32(30) -; CHECK-NEXT: lxvp 26, 32(2) -; CHECK-NEXT: xvmaddadp 33, 49, 35 -; CHECK-NEXT: xvmaddadp 32, 51, 35 -; CHECK-NEXT: xvmaddadp 37, 63, 35 -; CHECK-NEXT: xvmaddadp 36, 61, 35 -; CHECK-NEXT: xvmaddadp 13, 44, 34 -; CHECK-NEXT: xvmaddadp 12, 46, 34 -; CHECK-NEXT: xvmaddadp 11, 48, 34 -; CHECK-NEXT: xvmaddadp 10, 50, 34 -; CHECK-NEXT: xvmaddadp 9, 62, 34 -; CHECK-NEXT: xvmaddadp 8, 60, 34 -; CHECK-NEXT: xvmaddadp 7, 57, 59 -; CHECK-NEXT: xvmaddadp 6, 55, 59 -; CHECK-NEXT: xvmaddadp 5, 53, 59 -; CHECK-NEXT: xvmaddadp 4, 31, 59 -; CHECK-NEXT: xvmaddadp 3, 29, 59 -; CHECK-NEXT: xvmaddadp 2, 27, 59 -; CHECK-NEXT: xvmaddadp 1, 56, 58 -; CHECK-NEXT: xvmaddadp 0, 54, 58 -; CHECK-NEXT: xvmaddadp 40, 52, 58 +; CHECK-NEXT: lxvp 54, 32(30) +; CHECK-NEXT: lxvp 52, 32(29) +; CHECK-NEXT: lxvp 30, 32(23) +; CHECK-NEXT: lxvp 28, 32(24) +; CHECK-NEXT: lxvp 26, 32(25) +; CHECK-NEXT: xvmaddadp 5, 49, 35 +; CHECK-NEXT: xvmaddadp 4, 51, 35 +; CHECK-NEXT: xvmaddadp 3, 63, 35 +; CHECK-NEXT: xvmaddadp 2, 61, 35 +; CHECK-NEXT: xvmaddadp 40, 44, 34 +; CHECK-NEXT: xvmaddadp 39, 46, 34 +; CHECK-NEXT: xvmaddadp 38, 48, 34 +; CHECK-NEXT: xvmaddadp 33, 50, 34 +; CHECK-NEXT: xvmaddadp 32, 62, 34 +; CHECK-NEXT: xvmaddadp 37, 60, 34 +; CHECK-NEXT: xvmaddadp 36, 57, 59 +; CHECK-NEXT: xvmaddadp 13, 55, 59 +; CHECK-NEXT: xvmaddadp 12, 53, 59 +; CHECK-NEXT: xvmaddadp 11, 31, 59 +; CHECK-NEXT: xvmaddadp 10, 29, 59 +; CHECK-NEXT: xvmaddadp 9, 27, 59 +; CHECK-NEXT: xvmaddadp 8, 56, 58 +; CHECK-NEXT: xvmaddadp 7, 54, 58 +; CHECK-NEXT: xvmaddadp 6, 52, 58 ; CHECK-NEXT: xvmaddadp 41, 30, 58 ; CHECK-NEXT: xvmaddadp 43, 28, 58 ; CHECK-NEXT: xvmaddadp 42, 26, 58 ; CHECK-NEXT: addi 6, 6, 64 ; CHECK-NEXT: addi 5, 5, 64 +; CHECK-NEXT: addi 30, 30, 64 +; CHECK-NEXT: addi 29, 29, 64 +; CHECK-NEXT: addi 23, 23, 64 ; CHECK-NEXT: addi 24, 24, 64 ; CHECK-NEXT: addi 25, 25, 64 -; CHECK-NEXT: addi 29, 29, 64 -; CHECK-NEXT: addi 30, 30, 64 -; CHECK-NEXT: addi 2, 2, 64 ; CHECK-NEXT: bdnz .LBB0_4 ; CHECK-NEXT: # %bb.5: # %_loop_2_endl_ ; CHECK-NEXT: # ; CHECK-NEXT: addi 28, 28, 6 -; CHECK-NEXT: add 26, 26, 12 -; CHECK-NEXT: add 31, 31, 12 -; CHECK-NEXT: add 19, 19, 12 -; CHECK-NEXT: add 3, 3, 12 -; CHECK-NEXT: add 20, 20, 12 -; CHECK-NEXT: add 21, 21, 12 +; CHECK-NEXT: add 26, 26, 2 +; CHECK-NEXT: add 20, 20, 2 +; CHECK-NEXT: add 11, 11, 2 +; CHECK-NEXT: add 19, 19, 2 +; CHECK-NEXT: add 3, 3, 2 +; CHECK-NEXT: add 12, 12, 2 ; CHECK-NEXT: addi 27, 27, 1 ; CHECK-NEXT: cmpld 28, 4 ; CHECK-NEXT: ble 0, .LBB0_3 ; CHECK-NEXT: # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit -; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 3, 48(1) # 8-byte Folded Reload ; CHECK-NEXT: lxv 63, 384(1) # 16-byte Folded Reload -; CHECK-NEXT: stxv 39, 0(3) +; CHECK-NEXT: stxv 1, 0(3) ; CHECK-NEXT: ld 3, 64(1) # 8-byte Folded Reload ; CHECK-NEXT: lxv 62, 368(1) # 16-byte Folded Reload ; CHECK-NEXT: lxv 61, 352(1) # 16-byte Folded Reload @@ -284,7 +282,7 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: lxv 54, 240(1) # 16-byte Folded Reload ; CHECK-NEXT: lxv 53, 224(1) # 16-byte Folded Reload ; CHECK-NEXT: lxv 52, 208(1) # 16-byte Folded Reload -; CHECK-NEXT: stxv 38, 0(3) +; CHECK-NEXT: stxv 0, 0(3) ; CHECK-NEXT: ld 3, 72(1) # 8-byte Folded Reload ; CHECK-NEXT: lfd 31, 584(1) # 8-byte Folded Reload ; CHECK-NEXT: lfd 30, 576(1) # 8-byte Folded Reload @@ -297,8 +295,8 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: ld 29, 520(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 28, 512(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 27, 504(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 33, 0(3) -; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 5, 0(3) +; CHECK-NEXT: ld 3, 32(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 26, 496(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 25, 488(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 24, 480(1) # 8-byte Folded Reload @@ -310,40 +308,41 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: ld 18, 432(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 17, 424(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 16, 416(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 32, 0(3) -; CHECK-NEXT: ld 3, 48(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 37, 0(10) -; CHECK-NEXT: stxv 36, 0(9) -; CHECK-NEXT: stxv 13, 0(8) +; CHECK-NEXT: stxv 4, 0(3) +; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 15, 408(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 14, 400(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 12, 0(3) +; CHECK-NEXT: stxv 3, 0(3) +; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 2, 0(8) +; CHECK-NEXT: stxv 40, 0(3) ; CHECK-NEXT: ld 3, 80(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 11, 0(3) +; CHECK-NEXT: stxv 39, 0(10) +; CHECK-NEXT: stxv 38, 0(3) ; CHECK-NEXT: ld 3, 88(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 10, 0(3) +; CHECK-NEXT: stxv 33, 0(3) ; CHECK-NEXT: ld 3, 96(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 9, 0(3) +; CHECK-NEXT: stxv 32, 0(3) ; CHECK-NEXT: ld 3, 104(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 8, 0(3) +; CHECK-NEXT: stxv 37, 0(3) ; CHECK-NEXT: ld 3, 112(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 7, 0(3) +; CHECK-NEXT: stxv 36, 0(3) ; CHECK-NEXT: ld 3, 120(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 6, 0(3) +; CHECK-NEXT: stxv 13, 0(3) ; CHECK-NEXT: ld 3, 128(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 5, 0(3) +; CHECK-NEXT: stxv 12, 0(3) ; CHECK-NEXT: ld 3, 136(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 4, 0(3) +; CHECK-NEXT: stxv 11, 0(3) ; CHECK-NEXT: ld 3, 144(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 3, 0(3) +; CHECK-NEXT: stxv 10, 0(3) ; CHECK-NEXT: ld 3, 152(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 2, 0(3) +; CHECK-NEXT: stxv 9, 0(3) ; CHECK-NEXT: ld 3, 160(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 1, 0(3) +; CHECK-NEXT: stxv 8, 0(3) ; CHECK-NEXT: ld 3, 168(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 0, 0(3) +; CHECK-NEXT: stxv 7, 0(3) ; CHECK-NEXT: ld 3, 176(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 40, 0(3) +; CHECK-NEXT: stxv 6, 0(3) ; CHECK-NEXT: ld 3, 184(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 41, 0(3) ; CHECK-NEXT: ld 3, 192(1) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll b/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll index 799ba63a4df27..8fb4c21316b48 100644 --- a/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll +++ b/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll @@ -40,9 +40,10 @@ define signext i32 @test(ptr noalias %PtrA, ptr noalias %PtrB, i32 signext %LenA ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_4: # %if.end9 ; CHECK-NEXT: # -; CHECK-NEXT: lwzx 10, 6, 9 +; CHECK-NEXT: add 9, 3, 9 +; CHECK-NEXT: lwz 10, 4(9) ; CHECK-NEXT: addi 10, 10, 1 -; CHECK-NEXT: stwx 10, 6, 9 +; CHECK-NEXT: stw 10, 4(9) ; CHECK-NEXT: b .LBB0_1 ; CHECK-NEXT: .LBB0_5: # %if.then ; CHECK-NEXT: lwax 3, 9, 3 diff --git a/llvm/test/Transforms/LICM/gep-reassociate.ll b/llvm/test/Transforms/LICM/gep-reassociate.ll index 630a751999c49..0090c76b09dda 100644 --- a/llvm/test/Transforms/LICM/gep-reassociate.ll +++ b/llvm/test/Transforms/LICM/gep-reassociate.ll @@ -39,11 +39,13 @@ exit: ret void } -define void @both_inbounds_one_neg(ptr %ptr, i1 %c) { +define void @both_inbounds_one_neg(ptr %ptr, i1 %c, i64 %neg) { ; CHECK-LABEL: define void @both_inbounds_one_neg -; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]]) { +; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[NEG:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 -1 +; CHECK-NEXT: [[IS_NEG:%.*]] = icmp slt i64 [[NEG]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[IS_NEG]]) +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[NEG]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32() @@ -55,13 +57,15 @@ define void @both_inbounds_one_neg(ptr %ptr, i1 %c) { ; CHECK-NEXT: ret void ; entry: + %is.neg = icmp slt i64 %neg, 0 + call void @llvm.assume(i1 %is.neg) br label %loop loop: %val = call i32 @get.i32() %val.ext = zext i32 %val to i64 %ptr2 = getelementptr inbounds i8, ptr %ptr, i64 %val.ext - %ptr3 = getelementptr i8, ptr %ptr2, i64 -1 + %ptr3 = getelementptr i8, ptr %ptr2, i64 %neg call void @use(ptr %ptr3) br i1 %c, label %loop, label %exit @@ -69,11 +73,13 @@ exit: ret void } -define void @both_inbounds_pos(ptr %ptr, i1 %c) { +define void @both_inbounds_pos(ptr %ptr, i1 %c, i64 %nonneg) { ; CHECK-LABEL: define void @both_inbounds_pos -; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]]) { +; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[NONNEG:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 +; CHECK-NEXT: [[IS_NONNEG:%.*]] = icmp sge i64 [[NONNEG]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[IS_NONNEG]]) +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[NONNEG]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32() @@ -85,13 +91,15 @@ define void @both_inbounds_pos(ptr %ptr, i1 %c) { ; CHECK-NEXT: ret void ; entry: + %is.nonneg = icmp sge i64 %nonneg, 0 + call void @llvm.assume(i1 %is.nonneg) br label %loop loop: %val = call i32 @get.i32() %val.ext = zext i32 %val to i64 %ptr2 = getelementptr inbounds i8, ptr %ptr, i64 %val.ext - %ptr3 = getelementptr inbounds i8, ptr %ptr2, i64 1 + %ptr3 = getelementptr inbounds i8, ptr %ptr2, i64 %nonneg call void @use(ptr %ptr3) br i1 %c, label %loop, label %exit @@ -440,3 +448,32 @@ latch: exit: ret void } + +; Do not reassociate constant offset GEP. +define void @constant_offset(ptr %ptr, i1 %c) { +; CHECK-LABEL: define void @constant_offset +; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[VAL:%.*]] = call i64 @get.i64() +; CHECK-NEXT: [[GEP_BASE:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[VAL]] +; CHECK-NEXT: [[GEP_OFF:%.*]] = getelementptr i8, ptr [[GEP_BASE]], i64 1 +; CHECK-NEXT: call void @use(ptr [[GEP_OFF]]) +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %val = call i64 @get.i64() + %gep.base = getelementptr i8, ptr %ptr, i64 %val + %gep.off = getelementptr i8, ptr %gep.base, i64 1 + call void @use(ptr %gep.off) + br i1 %c, label %loop, label %exit + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index e89f41bb94665..97b5210b22f92 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -142,40 +142,40 @@ define void @fp_iv_loop2(ptr noalias nocapture %A, i32 %N) { ; AUTO_VEC-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]] ; AUTO_VEC: for.body.preheader.new: ; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[ZEXT]], 2147483640 -; AUTO_VEC-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 4 -; AUTO_VEC-NEXT: [[INVARIANT_GEP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 -; AUTO_VEC-NEXT: [[INVARIANT_GEP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 12 -; AUTO_VEC-NEXT: [[INVARIANT_GEP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 16 -; AUTO_VEC-NEXT: [[INVARIANT_GEP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 20 -; AUTO_VEC-NEXT: [[INVARIANT_GEP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 24 -; AUTO_VEC-NEXT: [[INVARIANT_GEP11:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 28 ; AUTO_VEC-NEXT: br label [[FOR_BODY:%.*]] ; AUTO_VEC: for.body: ; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ] ; AUTO_VEC-NEXT: [[X_06:%.*]] = phi float [ 1.000000e+00, [[FOR_BODY_PREHEADER_NEW]] ], [ [[CONV1_7:%.*]], [[FOR_BODY]] ] ; AUTO_VEC-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY]] ] -; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDVARS_IV]] ; AUTO_VEC-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 ; AUTO_VEC-NEXT: [[CONV1:%.*]] = fadd float [[X_06]], 5.000000e-01 -; AUTO_VEC-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4 ; AUTO_VEC-NEXT: store float [[CONV1]], ptr [[ARRAYIDX_1]], align 4 ; AUTO_VEC-NEXT: [[CONV1_1:%.*]] = fadd float [[CONV1]], 5.000000e-01 -; AUTO_VEC-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP1]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8 ; AUTO_VEC-NEXT: store float [[CONV1_1]], ptr [[ARRAYIDX_2]], align 4 ; AUTO_VEC-NEXT: [[CONV1_2:%.*]] = fadd float [[CONV1_1]], 5.000000e-01 -; AUTO_VEC-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP3]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP3]], i64 12 ; AUTO_VEC-NEXT: store float [[CONV1_2]], ptr [[ARRAYIDX_3]], align 4 ; AUTO_VEC-NEXT: [[CONV1_3:%.*]] = fadd float [[CONV1_2]], 5.000000e-01 -; AUTO_VEC-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP5]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 16 ; AUTO_VEC-NEXT: store float [[CONV1_3]], ptr [[ARRAYIDX_4]], align 4 ; AUTO_VEC-NEXT: [[CONV1_4:%.*]] = fadd float [[CONV1_3]], 5.000000e-01 -; AUTO_VEC-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP7]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 20 ; AUTO_VEC-NEXT: store float [[CONV1_4]], ptr [[ARRAYIDX_5]], align 4 ; AUTO_VEC-NEXT: [[CONV1_5:%.*]] = fadd float [[CONV1_4]], 5.000000e-01 -; AUTO_VEC-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP9]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 24 ; AUTO_VEC-NEXT: store float [[CONV1_5]], ptr [[ARRAYIDX_6]], align 4 ; AUTO_VEC-NEXT: [[CONV1_6:%.*]] = fadd float [[CONV1_5]], 5.000000e-01 -; AUTO_VEC-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP11]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 28 ; AUTO_VEC-NEXT: store float [[CONV1_6]], ptr [[ARRAYIDX_7]], align 4 ; AUTO_VEC-NEXT: [[CONV1_7]] = fadd float [[CONV1_6]], 5.000000e-01 ; AUTO_VEC-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8 @@ -299,40 +299,40 @@ define double @external_use_without_fast_math(ptr %a, i64 %n) { ; AUTO_VEC-NEXT: br i1 [[TMP0]], label [[FOR_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] ; AUTO_VEC: entry.new: ; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[SMAX]], 9223372036854775800 -; AUTO_VEC-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 8 -; AUTO_VEC-NEXT: [[INVARIANT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 16 -; AUTO_VEC-NEXT: [[INVARIANT_GEP4:%.*]] = getelementptr i8, ptr [[A]], i64 24 -; AUTO_VEC-NEXT: [[INVARIANT_GEP6:%.*]] = getelementptr i8, ptr [[A]], i64 32 -; AUTO_VEC-NEXT: [[INVARIANT_GEP8:%.*]] = getelementptr i8, ptr [[A]], i64 40 -; AUTO_VEC-NEXT: [[INVARIANT_GEP10:%.*]] = getelementptr i8, ptr [[A]], i64 48 -; AUTO_VEC-NEXT: [[INVARIANT_GEP12:%.*]] = getelementptr i8, ptr [[A]], i64 56 ; AUTO_VEC-NEXT: br label [[FOR_BODY:%.*]] ; AUTO_VEC: for.body: ; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[I_NEXT_7:%.*]], [[FOR_BODY]] ] ; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ 0.000000e+00, [[ENTRY_NEW]] ], [ [[J_NEXT_7:%.*]], [[FOR_BODY]] ] ; AUTO_VEC-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY]] ] -; AUTO_VEC-NEXT: [[T0:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] +; AUTO_VEC-NEXT: [[T0:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[I]] ; AUTO_VEC-NEXT: store double [[J]], ptr [[T0]], align 8 ; AUTO_VEC-NEXT: [[J_NEXT:%.*]] = fadd double [[J]], 3.000000e+00 -; AUTO_VEC-NEXT: [[T0_1:%.*]] = getelementptr double, ptr [[INVARIANT_GEP]], i64 [[I]] +; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] +; AUTO_VEC-NEXT: [[T0_1:%.*]] = getelementptr i8, ptr [[TMP1]], i64 8 ; AUTO_VEC-NEXT: store double [[J_NEXT]], ptr [[T0_1]], align 8 ; AUTO_VEC-NEXT: [[J_NEXT_1:%.*]] = fadd double [[J_NEXT]], 3.000000e+00 -; AUTO_VEC-NEXT: [[T0_2:%.*]] = getelementptr double, ptr [[INVARIANT_GEP2]], i64 [[I]] +; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] +; AUTO_VEC-NEXT: [[T0_2:%.*]] = getelementptr i8, ptr [[TMP2]], i64 16 ; AUTO_VEC-NEXT: store double [[J_NEXT_1]], ptr [[T0_2]], align 8 ; AUTO_VEC-NEXT: [[J_NEXT_2:%.*]] = fadd double [[J_NEXT_1]], 3.000000e+00 -; AUTO_VEC-NEXT: [[T0_3:%.*]] = getelementptr double, ptr [[INVARIANT_GEP4]], i64 [[I]] +; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] +; AUTO_VEC-NEXT: [[T0_3:%.*]] = getelementptr i8, ptr [[TMP3]], i64 24 ; AUTO_VEC-NEXT: store double [[J_NEXT_2]], ptr [[T0_3]], align 8 ; AUTO_VEC-NEXT: [[J_NEXT_3:%.*]] = fadd double [[J_NEXT_2]], 3.000000e+00 -; AUTO_VEC-NEXT: [[T0_4:%.*]] = getelementptr double, ptr [[INVARIANT_GEP6]], i64 [[I]] +; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] +; AUTO_VEC-NEXT: [[T0_4:%.*]] = getelementptr i8, ptr [[TMP4]], i64 32 ; AUTO_VEC-NEXT: store double [[J_NEXT_3]], ptr [[T0_4]], align 8 ; AUTO_VEC-NEXT: [[J_NEXT_4:%.*]] = fadd double [[J_NEXT_3]], 3.000000e+00 -; AUTO_VEC-NEXT: [[T0_5:%.*]] = getelementptr double, ptr [[INVARIANT_GEP8]], i64 [[I]] +; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] +; AUTO_VEC-NEXT: [[T0_5:%.*]] = getelementptr i8, ptr [[TMP5]], i64 40 ; AUTO_VEC-NEXT: store double [[J_NEXT_4]], ptr [[T0_5]], align 8 ; AUTO_VEC-NEXT: [[J_NEXT_5:%.*]] = fadd double [[J_NEXT_4]], 3.000000e+00 -; AUTO_VEC-NEXT: [[T0_6:%.*]] = getelementptr double, ptr [[INVARIANT_GEP10]], i64 [[I]] +; AUTO_VEC-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] +; AUTO_VEC-NEXT: [[T0_6:%.*]] = getelementptr i8, ptr [[TMP6]], i64 48 ; AUTO_VEC-NEXT: store double [[J_NEXT_5]], ptr [[T0_6]], align 8 ; AUTO_VEC-NEXT: [[J_NEXT_6:%.*]] = fadd double [[J_NEXT_5]], 3.000000e+00 -; AUTO_VEC-NEXT: [[T0_7:%.*]] = getelementptr double, ptr [[INVARIANT_GEP12]], i64 [[I]] +; AUTO_VEC-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] +; AUTO_VEC-NEXT: [[T0_7:%.*]] = getelementptr i8, ptr [[TMP7]], i64 56 ; AUTO_VEC-NEXT: store double [[J_NEXT_6]], ptr [[T0_7]], align 8 ; AUTO_VEC-NEXT: [[I_NEXT_7]] = add nuw nsw i64 [[I]], 8 ; AUTO_VEC-NEXT: [[J_NEXT_7]] = fadd double [[J_NEXT_6]], 3.000000e+00 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll index c98e7d349e6c0..482907d295706 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll @@ -8,12 +8,12 @@ define void @foo(ptr noalias noundef %0, ptr noalias noundef %1) optsize { ; CHECK-LABEL: define void @foo( ; CHECK-SAME: ptr noalias noundef readonly captures(none) [[TMP0:%.*]], ptr noalias noundef writeonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -28 ; CHECK-NEXT: br label [[TMP4:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[TMP4]] ] ; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 255, [[INDVARS_IV]] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i64 [[TMP3]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 -28 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = load <8 x i32>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER]], splat (i32 5) ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32>