From b5c4524e08eec9958899fcaa4f98df27fddf9fe0 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 4 Aug 2025 08:22:48 +0100 Subject: [PATCH] [AArch64][GlobalISel] Add a constant funnel shift post-legalizer combine. We want to be able to produce extr instructions post-legalization. They are legal for scalars, acting as a funnel shifts with a constant shift amount. Unfortunately I'm not sure if there is a way currently to represent that in the legalization rules, but it might be useful for several operations - to be able to treat and test operands with constant operands as legal or not. This adds a change to the existing matchOrShiftToFunnelShift so that AArch64 can generate such instructions post-legalization providing that the operation is scalar and the shift amount is constant. It doesn't feel like the best solution - any thoughts on alternatives? --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 3 +- .../include/llvm/Target/GlobalISel/Combine.td | 10 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 9 +- llvm/lib/Target/AArch64/AArch64Combine.td | 3 +- llvm/test/CodeGen/AArch64/adc.ll | 6 +- llvm/test/CodeGen/AArch64/fsh.ll | 473 ++++++++---------- llvm/test/CodeGen/AArch64/funnel-shift.ll | 55 +- llvm/test/CodeGen/AArch64/rem-by-const.ll | 173 +++---- 8 files changed, 340 insertions(+), 392 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index da829046cc421..9051fd0e4474c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -627,7 +627,8 @@ class CombinerHelper { /// This variant does not erase \p MI after calling the build function. void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo) const; - bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo) const; + bool matchOrShiftToFunnelShift(MachineInstr &MI, bool ScalarConstantsAreLegal, + BuildFnTy &MatchInfo) const; bool matchFunnelShiftToRotate(MachineInstr &MI) const; void applyFunnelShiftToRotate(MachineInstr &MI) const; bool matchRotateOutOfRange(MachineInstr &MI) const; diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index b619de39a8c75..c417da7c8b88f 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1000,10 +1000,18 @@ def extract_vec_elt_combines : GICombineGroup<[ def funnel_shift_from_or_shift : GICombineRule< (defs root:$root, build_fn_matchinfo:$info), (match (wip_match_opcode G_OR):$root, - [{ return Helper.matchOrShiftToFunnelShift(*${root}, ${info}); }]), + [{ return Helper.matchOrShiftToFunnelShift(*${root}, false, ${info}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${info}); }]) >; +def funnel_shift_from_or_shift_constants_are_legal : GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_OR):$root, + [{ return Helper.matchOrShiftToFunnelShift(*${root}, true, ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }]) +>; + + def funnel_shift_to_rotate : GICombineRule< (defs root:$root), (match (wip_match_opcode G_FSHL, G_FSHR):$root, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index e84ba91c47c8b..80a58b0bf2858 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4390,6 +4390,7 @@ void CombinerHelper::applyBuildFnNoErase( } bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI, + bool ScalarConstantsAreLegal, BuildFnTy &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_OR); @@ -4409,31 +4410,29 @@ bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI, // Given constants C0 and C1 such that C0 + C1 is bit-width: // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1) - int64_t CstShlAmt, CstLShrAmt; + int64_t CstShlAmt = 0, CstLShrAmt; if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) && mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) && CstShlAmt + CstLShrAmt == BitWidth) { FshOpc = TargetOpcode::G_FSHR; Amt = LShrAmt; - } else if (mi_match(LShrAmt, MRI, m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) && ShlAmt == Amt) { // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt) FshOpc = TargetOpcode::G_FSHL; - } else if (mi_match(ShlAmt, MRI, m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) && LShrAmt == Amt) { // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt) FshOpc = TargetOpcode::G_FSHR; - } else { return false; } LLT AmtTy = MRI.getType(Amt); - if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}})) + if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}) && + (!ScalarConstantsAreLegal || CstShlAmt == 0 || !Ty.isScalar())) return false; MatchInfo = [=](MachineIRBuilder &B) { diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 99f0af5f6a3f8..81de366c32e72 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -367,5 +367,6 @@ def AArch64PostLegalizerCombiner select_to_minmax, or_to_bsp, combine_concat_vector, commute_constant_to_rhs, extract_vec_elt_combines, push_freeze_to_prevent_poison_from_propagating, - combine_mul_cmlt, combine_use_vector_truncate, extmultomull]> { + combine_mul_cmlt, combine_use_vector_truncate, extmultomull, + funnel_shift_from_or_shift_constants_are_legal]> { } diff --git a/llvm/test/CodeGen/AArch64/adc.ll b/llvm/test/CodeGen/AArch64/adc.ll index 12e8bf26c9eac..03f3cf192102d 100644 --- a/llvm/test/CodeGen/AArch64/adc.ll +++ b/llvm/test/CodeGen/AArch64/adc.ll @@ -71,9 +71,8 @@ define i128 @test_shifted(i128 %a, i128 %b) { ; ; CHECK-GI-LABEL: test_shifted: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: lsr x8, x2, #19 +; CHECK-GI-NEXT: extr x8, x3, x2, #19 ; CHECK-GI-NEXT: adds x0, x0, x2, lsl #45 -; CHECK-GI-NEXT: orr x8, x8, x3, lsl #45 ; CHECK-GI-NEXT: adc x1, x1, x8 ; CHECK-GI-NEXT: ret %rhs = shl i128 %b, 45 @@ -108,8 +107,7 @@ define i128 @test_extended(i128 %a, i16 %b) { ; CHECK-GI-NEXT: sxth x8, w2 ; CHECK-GI-NEXT: adds x0, x0, w2, sxth #3 ; CHECK-GI-NEXT: asr x9, x8, #63 -; CHECK-GI-NEXT: lsr x8, x8, #61 -; CHECK-GI-NEXT: orr x8, x8, x9, lsl #3 +; CHECK-GI-NEXT: extr x8, x9, x8, #61 ; CHECK-GI-NEXT: adc x1, x1, x8 ; CHECK-GI-NEXT: ret %ext = sext i16 %b to i128 diff --git a/llvm/test/CodeGen/AArch64/fsh.ll b/llvm/test/CodeGen/AArch64/fsh.ll index 4c28c90824028..9eb2e3de2b2b6 100644 --- a/llvm/test/CodeGen/AArch64/fsh.ll +++ b/llvm/test/CodeGen/AArch64/fsh.ll @@ -510,41 +510,40 @@ define i128 @fshl_i128(i128 %a, i128 %b, i128 %c) { ; ; CHECK-GI-LABEL: fshl_i128: ; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w8, #64 // =0x40 ; CHECK-GI-NEXT: and x9, x4, #0x7f -; CHECK-GI-NEXT: mov w10, #64 // =0x40 -; CHECK-GI-NEXT: lsl x14, x3, #63 -; CHECK-GI-NEXT: sub x12, x10, x9 +; CHECK-GI-NEXT: mov w10, #127 // =0x7f +; CHECK-GI-NEXT: sub x12, x8, x9 ; CHECK-GI-NEXT: lsl x13, x1, x9 -; CHECK-GI-NEXT: mov w8, #127 // =0x7f +; CHECK-GI-NEXT: bic x10, x10, x4 ; CHECK-GI-NEXT: lsr x12, x0, x12 -; CHECK-GI-NEXT: bic x8, x8, x4 -; CHECK-GI-NEXT: sub x15, x9, #64 +; CHECK-GI-NEXT: sub x14, x9, #64 +; CHECK-GI-NEXT: lsl x15, x0, x9 +; CHECK-GI-NEXT: extr x16, x3, x2, #1 ; CHECK-GI-NEXT: cmp x9, #64 -; CHECK-GI-NEXT: lsl x9, x0, x9 -; CHECK-GI-NEXT: lsl x15, x0, x15 -; CHECK-GI-NEXT: orr x12, x12, x13 -; CHECK-GI-NEXT: orr x13, x14, x2, lsr #1 -; CHECK-GI-NEXT: lsr x14, x3, #1 -; CHECK-GI-NEXT: sub x10, x10, x8 -; CHECK-GI-NEXT: sub x16, x8, #64 -; CHECK-GI-NEXT: csel x9, x9, xzr, lo -; CHECK-GI-NEXT: lsr x17, x13, x8 -; CHECK-GI-NEXT: lsl x10, x14, x10 -; CHECK-GI-NEXT: csel x12, x12, x15, lo +; CHECK-GI-NEXT: sub x8, x8, x10 +; CHECK-GI-NEXT: orr x9, x12, x13 +; CHECK-GI-NEXT: lsr x12, x3, #1 +; CHECK-GI-NEXT: lsl x13, x0, x14 +; CHECK-GI-NEXT: csel x14, x15, xzr, lo +; CHECK-GI-NEXT: sub x15, x10, #64 +; CHECK-GI-NEXT: lsr x17, x16, x10 +; CHECK-GI-NEXT: lsl x8, x12, x8 +; CHECK-GI-NEXT: csel x9, x9, x13, lo ; CHECK-GI-NEXT: tst x4, #0x7f -; CHECK-GI-NEXT: lsr x15, x14, x16 +; CHECK-GI-NEXT: lsr x13, x12, x15 ; CHECK-GI-NEXT: mvn x11, x4 -; CHECK-GI-NEXT: csel x12, x1, x12, eq -; CHECK-GI-NEXT: orr x10, x17, x10 -; CHECK-GI-NEXT: cmp x8, #64 -; CHECK-GI-NEXT: lsr x14, x14, x8 -; CHECK-GI-NEXT: csel x10, x10, x15, lo +; CHECK-GI-NEXT: csel x9, x1, x9, eq +; CHECK-GI-NEXT: orr x8, x17, x8 +; CHECK-GI-NEXT: cmp x10, #64 +; CHECK-GI-NEXT: lsr x12, x12, x10 +; CHECK-GI-NEXT: csel x8, x8, x13, lo ; CHECK-GI-NEXT: tst x11, #0x7f -; CHECK-GI-NEXT: csel x10, x13, x10, eq -; CHECK-GI-NEXT: cmp x8, #64 -; CHECK-GI-NEXT: csel x8, x14, xzr, lo -; CHECK-GI-NEXT: orr x0, x9, x10 -; CHECK-GI-NEXT: orr x1, x12, x8 +; CHECK-GI-NEXT: csel x8, x16, x8, eq +; CHECK-GI-NEXT: cmp x10, #64 +; CHECK-GI-NEXT: csel x10, x12, xzr, lo +; CHECK-GI-NEXT: orr x0, x14, x8 +; CHECK-GI-NEXT: orr x1, x9, x10 ; CHECK-GI-NEXT: ret entry: %d = call i128 @llvm.fshl(i128 %a, i128 %b, i128 %c) @@ -571,41 +570,40 @@ define i128 @fshr_i128(i128 %a, i128 %b, i128 %c) { ; ; CHECK-GI-LABEL: fshr_i128: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: lsr x8, x0, #63 -; CHECK-GI-NEXT: mov w9, #127 // =0x7f -; CHECK-GI-NEXT: mov w10, #64 // =0x40 -; CHECK-GI-NEXT: bic x9, x9, x4 -; CHECK-GI-NEXT: lsl x11, x0, #1 -; CHECK-GI-NEXT: and x12, x4, #0x7f -; CHECK-GI-NEXT: orr x8, x8, x1, lsl #1 -; CHECK-GI-NEXT: sub x14, x10, x9 -; CHECK-GI-NEXT: sub x17, x9, #64 -; CHECK-GI-NEXT: lsl x15, x11, x9 -; CHECK-GI-NEXT: lsr x14, x11, x14 -; CHECK-GI-NEXT: cmp x9, #64 -; CHECK-GI-NEXT: lsl x16, x8, x9 -; CHECK-GI-NEXT: sub x9, x10, x12 -; CHECK-GI-NEXT: lsl x10, x11, x17 -; CHECK-GI-NEXT: mvn x13, x4 -; CHECK-GI-NEXT: csel x11, x15, xzr, lo -; CHECK-GI-NEXT: sub x15, x12, #64 -; CHECK-GI-NEXT: orr x14, x14, x16 -; CHECK-GI-NEXT: lsr x16, x2, x12 -; CHECK-GI-NEXT: lsl x9, x3, x9 -; CHECK-GI-NEXT: csel x10, x14, x10, lo -; CHECK-GI-NEXT: tst x13, #0x7f -; CHECK-GI-NEXT: lsr x13, x3, x15 -; CHECK-GI-NEXT: csel x8, x8, x10, eq -; CHECK-GI-NEXT: orr x9, x16, x9 -; CHECK-GI-NEXT: cmp x12, #64 -; CHECK-GI-NEXT: lsr x10, x3, x12 -; CHECK-GI-NEXT: csel x9, x9, x13, lo +; CHECK-GI-NEXT: mov w8, #127 // =0x7f +; CHECK-GI-NEXT: lsl x9, x0, #1 +; CHECK-GI-NEXT: extr x10, x1, x0, #63 +; CHECK-GI-NEXT: bic x8, x8, x4 +; CHECK-GI-NEXT: mov w11, #64 // =0x40 +; CHECK-GI-NEXT: and x14, x4, #0x7f +; CHECK-GI-NEXT: sub x12, x11, x8 +; CHECK-GI-NEXT: lsl x13, x10, x8 +; CHECK-GI-NEXT: lsl x16, x9, x8 +; CHECK-GI-NEXT: lsr x12, x9, x12 +; CHECK-GI-NEXT: sub x17, x8, #64 +; CHECK-GI-NEXT: cmp x8, #64 +; CHECK-GI-NEXT: lsl x8, x9, x17 +; CHECK-GI-NEXT: sub x11, x11, x14 +; CHECK-GI-NEXT: mvn x15, x4 +; CHECK-GI-NEXT: orr x12, x12, x13 +; CHECK-GI-NEXT: csel x9, x16, xzr, lo +; CHECK-GI-NEXT: sub x13, x14, #64 +; CHECK-GI-NEXT: lsr x16, x2, x14 +; CHECK-GI-NEXT: lsl x11, x3, x11 +; CHECK-GI-NEXT: csel x8, x12, x8, lo +; CHECK-GI-NEXT: tst x15, #0x7f +; CHECK-GI-NEXT: lsr x12, x3, x13 +; CHECK-GI-NEXT: csel x8, x10, x8, eq +; CHECK-GI-NEXT: orr x10, x16, x11 +; CHECK-GI-NEXT: cmp x14, #64 +; CHECK-GI-NEXT: lsr x11, x3, x14 +; CHECK-GI-NEXT: csel x10, x10, x12, lo ; CHECK-GI-NEXT: tst x4, #0x7f -; CHECK-GI-NEXT: csel x9, x2, x9, eq -; CHECK-GI-NEXT: cmp x12, #64 -; CHECK-GI-NEXT: csel x10, x10, xzr, lo -; CHECK-GI-NEXT: orr x0, x11, x9 -; CHECK-GI-NEXT: orr x1, x8, x10 +; CHECK-GI-NEXT: csel x10, x2, x10, eq +; CHECK-GI-NEXT: cmp x14, #64 +; CHECK-GI-NEXT: csel x11, x11, xzr, lo +; CHECK-GI-NEXT: orr x0, x9, x10 +; CHECK-GI-NEXT: orr x1, x8, x11 ; CHECK-GI-NEXT: ret entry: %d = call i128 @llvm.fshr(i128 %a, i128 %b, i128 %c) @@ -720,10 +718,9 @@ define i128 @rotl_i128_c(i128 %a) { ; ; CHECK-GI-LABEL: rotl_i128_c: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: lsr x8, x0, #61 -; CHECK-GI-NEXT: lsr x9, x1, #61 -; CHECK-GI-NEXT: orr x1, x8, x1, lsl #3 -; CHECK-GI-NEXT: orr x0, x9, x0, lsl #3 +; CHECK-GI-NEXT: extr x8, x1, x0, #61 +; CHECK-GI-NEXT: extr x0, x0, x1, #61 +; CHECK-GI-NEXT: mov x1, x8 ; CHECK-GI-NEXT: ret entry: %d = call i128 @llvm.fshl(i128 %a, i128 %a, i128 3) @@ -731,20 +728,12 @@ entry: } define i128 @rotr_i128_c(i128 %a) { -; CHECK-SD-LABEL: rotr_i128_c: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: extr x8, x1, x0, #3 -; CHECK-SD-NEXT: extr x1, x0, x1, #3 -; CHECK-SD-NEXT: mov x0, x8 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: rotr_i128_c: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: lsl x8, x1, #61 -; CHECK-GI-NEXT: lsl x9, x0, #61 -; CHECK-GI-NEXT: orr x0, x8, x0, lsr #3 -; CHECK-GI-NEXT: orr x1, x9, x1, lsr #3 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: rotr_i128_c: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: extr x8, x1, x0, #3 +; CHECK-NEXT: extr x1, x0, x1, #3 +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret entry: %d = call i128 @llvm.fshr(i128 %a, i128 %a, i128 3) ret i128 %d @@ -868,10 +857,8 @@ define i128 @fshl_i128_c(i128 %a, i128 %b) { ; ; CHECK-GI-LABEL: fshl_i128_c: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: lsr x8, x0, #61 -; CHECK-GI-NEXT: lsr x9, x3, #61 -; CHECK-GI-NEXT: orr x1, x8, x1, lsl #3 -; CHECK-GI-NEXT: orr x0, x9, x0, lsl #3 +; CHECK-GI-NEXT: extr x1, x1, x0, #61 +; CHECK-GI-NEXT: extr x0, x0, x3, #61 ; CHECK-GI-NEXT: ret entry: %d = call i128 @llvm.fshl(i128 %a, i128 %b, i128 3) @@ -879,21 +866,12 @@ entry: } define i128 @fshr_i128_c(i128 %a, i128 %b) { -; CHECK-SD-LABEL: fshr_i128_c: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: extr x8, x3, x2, #3 -; CHECK-SD-NEXT: extr x1, x0, x3, #3 -; CHECK-SD-NEXT: mov x0, x8 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fshr_i128_c: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: lsl x8, x3, #61 -; CHECK-GI-NEXT: lsr x9, x3, #3 -; CHECK-GI-NEXT: orr x8, x8, x2, lsr #3 -; CHECK-GI-NEXT: orr x1, x9, x0, lsl #61 -; CHECK-GI-NEXT: mov x0, x8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fshr_i128_c: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: extr x8, x3, x2, #3 +; CHECK-NEXT: extr x1, x0, x3, #3 +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret entry: %d = call i128 @llvm.fshr(i128 %a, i128 %b, i128 3) ret i128 %d @@ -3012,75 +2990,73 @@ define <2 x i128> @fshl_v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) { ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 ; CHECK-GI-NEXT: .cfi_offset w19, -16 ; CHECK-GI-NEXT: ldr x11, [sp, #16] -; CHECK-GI-NEXT: mov w10, #64 // =0x40 +; CHECK-GI-NEXT: mov w9, #64 // =0x40 ; CHECK-GI-NEXT: ldr x12, [sp, #32] ; CHECK-GI-NEXT: mov w13, #127 // =0x7f -; CHECK-GI-NEXT: and x9, x11, #0x7f +; CHECK-GI-NEXT: and x8, x11, #0x7f ; CHECK-GI-NEXT: and x14, x12, #0x7f -; CHECK-GI-NEXT: mvn x15, x11 -; CHECK-GI-NEXT: sub x8, x10, x9 -; CHECK-GI-NEXT: sub x16, x9, #64 -; CHECK-GI-NEXT: lsl x19, x1, x9 -; CHECK-GI-NEXT: lsr x18, x0, x8 -; CHECK-GI-NEXT: lsl x17, x0, x9 -; CHECK-GI-NEXT: lsl x16, x0, x16 -; CHECK-GI-NEXT: cmp x9, #64 -; CHECK-GI-NEXT: bic x0, x13, x11 -; CHECK-GI-NEXT: mvn x8, x12 -; CHECK-GI-NEXT: orr x18, x18, x19 -; CHECK-GI-NEXT: csel x9, x17, xzr, lo +; CHECK-GI-NEXT: mvn x18, x11 +; CHECK-GI-NEXT: sub x10, x9, x8 +; CHECK-GI-NEXT: sub x15, x8, #64 +; CHECK-GI-NEXT: lsl x17, x1, x8 +; CHECK-GI-NEXT: lsr x16, x0, x10 +; CHECK-GI-NEXT: lsl x15, x0, x15 +; CHECK-GI-NEXT: cmp x8, #64 +; CHECK-GI-NEXT: lsl x19, x0, x8 +; CHECK-GI-NEXT: lsl x0, x3, x14 +; CHECK-GI-NEXT: mvn x10, x12 +; CHECK-GI-NEXT: orr x16, x16, x17 ; CHECK-GI-NEXT: sub x17, x14, #64 -; CHECK-GI-NEXT: csel x16, x18, x16, lo +; CHECK-GI-NEXT: csel x15, x16, x15, lo +; CHECK-GI-NEXT: sub x16, x9, x14 +; CHECK-GI-NEXT: csel x8, x19, xzr, lo +; CHECK-GI-NEXT: lsr x16, x2, x16 ; CHECK-GI-NEXT: tst x11, #0x7f -; CHECK-GI-NEXT: sub x11, x10, x14 -; CHECK-GI-NEXT: lsr x11, x2, x11 -; CHECK-GI-NEXT: lsl x18, x3, x14 -; CHECK-GI-NEXT: csel x16, x1, x16, eq -; CHECK-GI-NEXT: lsl x1, x2, x14 +; CHECK-GI-NEXT: lsl x19, x2, x14 ; CHECK-GI-NEXT: lsl x17, x2, x17 +; CHECK-GI-NEXT: csel x15, x1, x15, eq ; CHECK-GI-NEXT: cmp x14, #64 -; CHECK-GI-NEXT: lsl x14, x5, #63 -; CHECK-GI-NEXT: orr x11, x11, x18 -; CHECK-GI-NEXT: bic x13, x13, x12 -; CHECK-GI-NEXT: csel x18, x1, xzr, lo -; CHECK-GI-NEXT: csel x11, x11, x17, lo +; CHECK-GI-NEXT: orr x16, x16, x0 +; CHECK-GI-NEXT: bic x11, x13, x11 +; CHECK-GI-NEXT: csel x14, x19, xzr, lo +; CHECK-GI-NEXT: csel x16, x16, x17, lo ; CHECK-GI-NEXT: tst x12, #0x7f -; CHECK-GI-NEXT: lsr x12, x5, #1 -; CHECK-GI-NEXT: orr x14, x14, x4, lsr #1 -; CHECK-GI-NEXT: lsl x17, x7, #63 -; CHECK-GI-NEXT: sub x1, x10, x0 -; CHECK-GI-NEXT: csel x11, x3, x11, eq -; CHECK-GI-NEXT: sub x2, x0, #64 -; CHECK-GI-NEXT: lsr x3, x14, x0 -; CHECK-GI-NEXT: lsl x1, x12, x1 -; CHECK-GI-NEXT: lsr x4, x7, #1 -; CHECK-GI-NEXT: orr x17, x17, x6, lsr #1 -; CHECK-GI-NEXT: lsr x2, x12, x2 -; CHECK-GI-NEXT: cmp x0, #64 -; CHECK-GI-NEXT: orr x1, x3, x1 -; CHECK-GI-NEXT: sub x10, x10, x13 -; CHECK-GI-NEXT: lsr x12, x12, x0 -; CHECK-GI-NEXT: csel x1, x1, x2, lo -; CHECK-GI-NEXT: tst x15, #0x7f -; CHECK-GI-NEXT: sub x15, x13, #64 -; CHECK-GI-NEXT: lsr x2, x17, x13 -; CHECK-GI-NEXT: lsl x10, x4, x10 -; CHECK-GI-NEXT: csel x14, x14, x1, eq -; CHECK-GI-NEXT: cmp x0, #64 -; CHECK-GI-NEXT: lsr x15, x4, x15 -; CHECK-GI-NEXT: lsr x0, x4, x13 -; CHECK-GI-NEXT: csel x12, x12, xzr, lo -; CHECK-GI-NEXT: orr x10, x2, x10 -; CHECK-GI-NEXT: cmp x13, #64 -; CHECK-GI-NEXT: csel x10, x10, x15, lo -; CHECK-GI-NEXT: tst x8, #0x7f -; CHECK-GI-NEXT: orr x1, x16, x12 -; CHECK-GI-NEXT: csel x8, x17, x10, eq -; CHECK-GI-NEXT: cmp x13, #64 -; CHECK-GI-NEXT: csel x10, x0, xzr, lo -; CHECK-GI-NEXT: orr x0, x9, x14 -; CHECK-GI-NEXT: orr x2, x18, x8 -; CHECK-GI-NEXT: orr x3, x11, x10 +; CHECK-GI-NEXT: lsr x17, x5, #1 +; CHECK-GI-NEXT: extr x0, x5, x4, #1 +; CHECK-GI-NEXT: bic x12, x13, x12 +; CHECK-GI-NEXT: csel x13, x3, x16, eq +; CHECK-GI-NEXT: sub x16, x9, x11 +; CHECK-GI-NEXT: sub x1, x11, #64 +; CHECK-GI-NEXT: lsr x3, x7, #1 +; CHECK-GI-NEXT: lsr x2, x0, x11 +; CHECK-GI-NEXT: lsl x16, x17, x16 +; CHECK-GI-NEXT: extr x4, x7, x6, #1 +; CHECK-GI-NEXT: lsr x1, x17, x1 +; CHECK-GI-NEXT: cmp x11, #64 +; CHECK-GI-NEXT: sub x9, x9, x12 +; CHECK-GI-NEXT: orr x16, x2, x16 +; CHECK-GI-NEXT: lsr x17, x17, x11 +; CHECK-GI-NEXT: lsl x9, x3, x9 +; CHECK-GI-NEXT: csel x16, x16, x1, lo +; CHECK-GI-NEXT: tst x18, #0x7f +; CHECK-GI-NEXT: sub x18, x12, #64 +; CHECK-GI-NEXT: lsr x1, x4, x12 +; CHECK-GI-NEXT: csel x16, x0, x16, eq +; CHECK-GI-NEXT: cmp x11, #64 +; CHECK-GI-NEXT: lsr x11, x3, x18 +; CHECK-GI-NEXT: csel x17, x17, xzr, lo +; CHECK-GI-NEXT: cmp x12, #64 +; CHECK-GI-NEXT: orr x9, x1, x9 +; CHECK-GI-NEXT: lsr x18, x3, x12 +; CHECK-GI-NEXT: orr x0, x8, x16 +; CHECK-GI-NEXT: csel x9, x9, x11, lo +; CHECK-GI-NEXT: tst x10, #0x7f +; CHECK-GI-NEXT: orr x1, x15, x17 +; CHECK-GI-NEXT: csel x9, x4, x9, eq +; CHECK-GI-NEXT: cmp x12, #64 +; CHECK-GI-NEXT: csel x10, x18, xzr, lo +; CHECK-GI-NEXT: orr x2, x14, x9 +; CHECK-GI-NEXT: orr x3, x13, x10 ; CHECK-GI-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload ; CHECK-GI-NEXT: ret entry: @@ -3124,75 +3100,73 @@ define <2 x i128> @fshr_v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) { ; CHECK-GI-LABEL: fshr_v2i128: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ldr x9, [sp] -; CHECK-GI-NEXT: lsl x12, x1, #1 -; CHECK-GI-NEXT: mov w11, #127 // =0x7f -; CHECK-GI-NEXT: mov w14, #64 // =0x40 -; CHECK-GI-NEXT: lsl x15, x0, #1 +; CHECK-GI-NEXT: mov w10, #127 // =0x7f +; CHECK-GI-NEXT: mov w12, #64 // =0x40 +; CHECK-GI-NEXT: lsl x13, x0, #1 +; CHECK-GI-NEXT: extr x14, x1, x0, #63 ; CHECK-GI-NEXT: ldr x8, [sp, #16] -; CHECK-GI-NEXT: bic x13, x11, x9 -; CHECK-GI-NEXT: orr x12, x12, x0, lsr #63 -; CHECK-GI-NEXT: lsl x1, x3, #1 -; CHECK-GI-NEXT: sub x17, x14, x13 -; CHECK-GI-NEXT: sub x18, x13, #64 -; CHECK-GI-NEXT: lsl x3, x15, x13 -; CHECK-GI-NEXT: lsr x17, x15, x17 -; CHECK-GI-NEXT: lsl x0, x12, x13 -; CHECK-GI-NEXT: lsl x15, x15, x18 -; CHECK-GI-NEXT: bic x11, x11, x8 +; CHECK-GI-NEXT: bic x11, x10, x9 +; CHECK-GI-NEXT: mvn x16, x9 +; CHECK-GI-NEXT: and x15, x9, #0x7f +; CHECK-GI-NEXT: sub x17, x12, x11 +; CHECK-GI-NEXT: sub x18, x11, #64 +; CHECK-GI-NEXT: lsl x0, x14, x11 +; CHECK-GI-NEXT: lsr x17, x13, x17 +; CHECK-GI-NEXT: lsl x1, x13, x11 +; CHECK-GI-NEXT: lsl x13, x13, x18 +; CHECK-GI-NEXT: bic x10, x10, x8 ; CHECK-GI-NEXT: lsl x18, x2, #1 -; CHECK-GI-NEXT: cmp x13, #64 +; CHECK-GI-NEXT: cmp x11, #64 ; CHECK-GI-NEXT: orr x17, x17, x0 -; CHECK-GI-NEXT: orr x13, x1, x2, lsr #63 -; CHECK-GI-NEXT: mvn x16, x9 -; CHECK-GI-NEXT: csel x15, x17, x15, lo -; CHECK-GI-NEXT: sub x17, x14, x11 -; CHECK-GI-NEXT: csel x0, x3, xzr, lo +; CHECK-GI-NEXT: extr x11, x3, x2, #63 +; CHECK-GI-NEXT: csel x0, x1, xzr, lo +; CHECK-GI-NEXT: csel x13, x17, x13, lo +; CHECK-GI-NEXT: sub x17, x12, x10 ; CHECK-GI-NEXT: tst x16, #0x7f -; CHECK-GI-NEXT: sub x16, x11, #64 +; CHECK-GI-NEXT: sub x16, x10, #64 ; CHECK-GI-NEXT: lsr x17, x18, x17 -; CHECK-GI-NEXT: lsl x2, x13, x11 -; CHECK-GI-NEXT: lsl x1, x18, x11 -; CHECK-GI-NEXT: csel x12, x12, x15, eq -; CHECK-GI-NEXT: lsl x15, x18, x16 -; CHECK-GI-NEXT: and x10, x9, #0x7f -; CHECK-GI-NEXT: cmp x11, #64 -; CHECK-GI-NEXT: mvn x11, x8 +; CHECK-GI-NEXT: lsl x2, x11, x10 +; CHECK-GI-NEXT: lsl x1, x18, x10 +; CHECK-GI-NEXT: csel x13, x14, x13, eq +; CHECK-GI-NEXT: lsl x14, x18, x16 +; CHECK-GI-NEXT: cmp x10, #64 +; CHECK-GI-NEXT: mvn x10, x8 ; CHECK-GI-NEXT: orr x16, x17, x2 ; CHECK-GI-NEXT: csel x17, x1, xzr, lo -; CHECK-GI-NEXT: csel x15, x16, x15, lo -; CHECK-GI-NEXT: tst x11, #0x7f -; CHECK-GI-NEXT: sub x11, x14, x10 -; CHECK-GI-NEXT: sub x16, x10, #64 -; CHECK-GI-NEXT: lsr x18, x4, x10 -; CHECK-GI-NEXT: lsl x11, x5, x11 -; CHECK-GI-NEXT: csel x13, x13, x15, eq -; CHECK-GI-NEXT: lsr x15, x5, x16 +; CHECK-GI-NEXT: csel x14, x16, x14, lo +; CHECK-GI-NEXT: tst x10, #0x7f +; CHECK-GI-NEXT: sub x10, x12, x15 +; CHECK-GI-NEXT: sub x16, x15, #64 +; CHECK-GI-NEXT: lsr x18, x4, x15 +; CHECK-GI-NEXT: lsl x10, x5, x10 +; CHECK-GI-NEXT: csel x11, x11, x14, eq +; CHECK-GI-NEXT: lsr x14, x5, x16 ; CHECK-GI-NEXT: and x1, x8, #0x7f -; CHECK-GI-NEXT: orr x11, x18, x11 -; CHECK-GI-NEXT: cmp x10, #64 -; CHECK-GI-NEXT: lsr x16, x5, x10 -; CHECK-GI-NEXT: csel x11, x11, x15, lo +; CHECK-GI-NEXT: cmp x15, #64 +; CHECK-GI-NEXT: lsr x16, x5, x15 +; CHECK-GI-NEXT: orr x10, x18, x10 +; CHECK-GI-NEXT: csel x10, x10, x14, lo ; CHECK-GI-NEXT: tst x9, #0x7f -; CHECK-GI-NEXT: sub x9, x14, x1 -; CHECK-GI-NEXT: sub x14, x1, #64 -; CHECK-GI-NEXT: lsr x15, x6, x1 +; CHECK-GI-NEXT: sub x9, x12, x1 +; CHECK-GI-NEXT: sub x12, x1, #64 +; CHECK-GI-NEXT: lsr x14, x6, x1 ; CHECK-GI-NEXT: lsl x9, x7, x9 -; CHECK-GI-NEXT: csel x11, x4, x11, eq -; CHECK-GI-NEXT: cmp x10, #64 -; CHECK-GI-NEXT: lsr x10, x7, x14 -; CHECK-GI-NEXT: csel x14, x16, xzr, lo -; CHECK-GI-NEXT: orr x9, x15, x9 +; CHECK-GI-NEXT: csel x10, x4, x10, eq +; CHECK-GI-NEXT: cmp x15, #64 +; CHECK-GI-NEXT: lsr x12, x7, x12 +; CHECK-GI-NEXT: csel x15, x16, xzr, lo +; CHECK-GI-NEXT: orr x9, x14, x9 ; CHECK-GI-NEXT: cmp x1, #64 -; CHECK-GI-NEXT: lsr x15, x7, x1 -; CHECK-GI-NEXT: csel x9, x9, x10, lo +; CHECK-GI-NEXT: lsr x14, x7, x1 +; CHECK-GI-NEXT: csel x9, x9, x12, lo ; CHECK-GI-NEXT: tst x8, #0x7f ; CHECK-GI-NEXT: csel x8, x6, x9, eq ; CHECK-GI-NEXT: cmp x1, #64 -; CHECK-GI-NEXT: orr x0, x0, x11 -; CHECK-GI-NEXT: csel x9, x15, xzr, lo -; CHECK-GI-NEXT: orr x1, x12, x14 +; CHECK-GI-NEXT: orr x0, x0, x10 +; CHECK-GI-NEXT: csel x9, x14, xzr, lo +; CHECK-GI-NEXT: orr x1, x13, x15 ; CHECK-GI-NEXT: orr x2, x17, x8 -; CHECK-GI-NEXT: orr x3, x13, x9 +; CHECK-GI-NEXT: orr x3, x11, x9 ; CHECK-GI-NEXT: ret entry: %d = call <2 x i128> @llvm.fshr(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) @@ -3862,15 +3836,12 @@ define <2 x i128> @rotl_v2i128_c(<2 x i128> %a) { ; ; CHECK-GI-LABEL: rotl_v2i128_c: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: lsr x8, x1, #61 -; CHECK-GI-NEXT: lsl x9, x1, #3 -; CHECK-GI-NEXT: lsl x10, x3, #3 -; CHECK-GI-NEXT: lsr x11, x3, #61 -; CHECK-GI-NEXT: orr x8, x8, x0, lsl #3 -; CHECK-GI-NEXT: orr x1, x9, x0, lsr #61 -; CHECK-GI-NEXT: orr x3, x10, x2, lsr #61 -; CHECK-GI-NEXT: orr x2, x11, x2, lsl #3 +; CHECK-GI-NEXT: extr x8, x0, x1, #61 +; CHECK-GI-NEXT: extr x9, x3, x2, #61 +; CHECK-GI-NEXT: extr x1, x1, x0, #61 +; CHECK-GI-NEXT: extr x2, x2, x3, #61 ; CHECK-GI-NEXT: mov x0, x8 +; CHECK-GI-NEXT: mov x3, x9 ; CHECK-GI-NEXT: ret entry: %d = call <2 x i128> @llvm.fshl(<2 x i128> %a, <2 x i128> %a, <2 x i128> ) @@ -3890,14 +3861,12 @@ define <2 x i128> @rotr_v2i128_c(<2 x i128> %a) { ; ; CHECK-GI-LABEL: rotr_v2i128_c: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: lsl x8, x1, #61 -; CHECK-GI-NEXT: lsl x9, x3, #61 -; CHECK-GI-NEXT: lsl x10, x0, #61 -; CHECK-GI-NEXT: lsl x11, x2, #61 -; CHECK-GI-NEXT: orr x0, x8, x0, lsr #3 -; CHECK-GI-NEXT: orr x2, x9, x2, lsr #3 -; CHECK-GI-NEXT: orr x1, x10, x1, lsr #3 -; CHECK-GI-NEXT: orr x3, x11, x3, lsr #3 +; CHECK-GI-NEXT: extr x8, x1, x0, #3 +; CHECK-GI-NEXT: extr x9, x3, x2, #3 +; CHECK-GI-NEXT: extr x1, x0, x1, #3 +; CHECK-GI-NEXT: extr x3, x2, x3, #3 +; CHECK-GI-NEXT: mov x0, x8 +; CHECK-GI-NEXT: mov x2, x9 ; CHECK-GI-NEXT: ret entry: %d = call <2 x i128> @llvm.fshr(<2 x i128> %a, <2 x i128> %a, <2 x i128> ) @@ -4463,14 +4432,10 @@ define <2 x i128> @fshl_v2i128_c(<2 x i128> %a, <2 x i128> %b) { ; ; CHECK-GI-LABEL: fshl_v2i128_c: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: lsr x8, x5, #61 -; CHECK-GI-NEXT: lsl x9, x1, #3 -; CHECK-GI-NEXT: lsl x10, x3, #3 -; CHECK-GI-NEXT: lsr x11, x7, #61 -; CHECK-GI-NEXT: orr x8, x8, x0, lsl #3 -; CHECK-GI-NEXT: orr x1, x9, x0, lsr #61 -; CHECK-GI-NEXT: orr x3, x10, x2, lsr #61 -; CHECK-GI-NEXT: orr x2, x11, x2, lsl #3 +; CHECK-GI-NEXT: extr x8, x0, x5, #61 +; CHECK-GI-NEXT: extr x1, x1, x0, #61 +; CHECK-GI-NEXT: extr x3, x3, x2, #61 +; CHECK-GI-NEXT: extr x2, x2, x7, #61 ; CHECK-GI-NEXT: mov x0, x8 ; CHECK-GI-NEXT: ret entry: @@ -4479,29 +4444,15 @@ entry: } define <2 x i128> @fshr_v2i128_c(<2 x i128> %a, <2 x i128> %b) { -; CHECK-SD-LABEL: fshr_v2i128_c: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: extr x8, x5, x4, #3 -; CHECK-SD-NEXT: extr x9, x7, x6, #3 -; CHECK-SD-NEXT: extr x1, x0, x5, #3 -; CHECK-SD-NEXT: extr x3, x2, x7, #3 -; CHECK-SD-NEXT: mov x0, x8 -; CHECK-SD-NEXT: mov x2, x9 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fshr_v2i128_c: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: lsl x8, x5, #61 -; CHECK-GI-NEXT: lsl x9, x7, #61 -; CHECK-GI-NEXT: lsr x10, x5, #3 -; CHECK-GI-NEXT: lsr x11, x7, #3 -; CHECK-GI-NEXT: orr x8, x8, x4, lsr #3 -; CHECK-GI-NEXT: orr x9, x9, x6, lsr #3 -; CHECK-GI-NEXT: orr x1, x10, x0, lsl #61 -; CHECK-GI-NEXT: orr x3, x11, x2, lsl #61 -; CHECK-GI-NEXT: mov x0, x8 -; CHECK-GI-NEXT: mov x2, x9 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fshr_v2i128_c: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: extr x8, x5, x4, #3 +; CHECK-NEXT: extr x9, x7, x6, #3 +; CHECK-NEXT: extr x1, x0, x5, #3 +; CHECK-NEXT: extr x3, x2, x7, #3 +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: mov x2, x9 +; CHECK-NEXT: ret entry: %d = call <2 x i128> @llvm.fshr(<2 x i128> %a, <2 x i128> %b, <2 x i128> ) ret <2 x i128> %d diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll index e5aa360f804c1..5ccdccc9bc1f5 100644 --- a/llvm/test/CodeGen/AArch64/funnel-shift.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -85,41 +85,40 @@ define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind { ; ; CHECK-GI-LABEL: fshl_i128: ; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #64 // =0x40 ; CHECK-GI-NEXT: and x9, x4, #0x7f -; CHECK-GI-NEXT: mov w10, #64 // =0x40 -; CHECK-GI-NEXT: lsl x14, x3, #63 -; CHECK-GI-NEXT: sub x12, x10, x9 +; CHECK-GI-NEXT: mov w10, #127 // =0x7f +; CHECK-GI-NEXT: sub x12, x8, x9 ; CHECK-GI-NEXT: lsl x13, x1, x9 -; CHECK-GI-NEXT: mov w8, #127 // =0x7f +; CHECK-GI-NEXT: bic x10, x10, x4 ; CHECK-GI-NEXT: lsr x12, x0, x12 -; CHECK-GI-NEXT: bic x8, x8, x4 -; CHECK-GI-NEXT: sub x15, x9, #64 +; CHECK-GI-NEXT: sub x14, x9, #64 +; CHECK-GI-NEXT: lsl x15, x0, x9 +; CHECK-GI-NEXT: extr x16, x3, x2, #1 ; CHECK-GI-NEXT: cmp x9, #64 -; CHECK-GI-NEXT: lsl x9, x0, x9 -; CHECK-GI-NEXT: lsl x15, x0, x15 -; CHECK-GI-NEXT: orr x12, x12, x13 -; CHECK-GI-NEXT: orr x13, x14, x2, lsr #1 -; CHECK-GI-NEXT: lsr x14, x3, #1 -; CHECK-GI-NEXT: sub x10, x10, x8 -; CHECK-GI-NEXT: sub x16, x8, #64 -; CHECK-GI-NEXT: csel x9, x9, xzr, lo -; CHECK-GI-NEXT: lsr x17, x13, x8 -; CHECK-GI-NEXT: lsl x10, x14, x10 -; CHECK-GI-NEXT: csel x12, x12, x15, lo +; CHECK-GI-NEXT: sub x8, x8, x10 +; CHECK-GI-NEXT: orr x9, x12, x13 +; CHECK-GI-NEXT: lsr x12, x3, #1 +; CHECK-GI-NEXT: lsl x13, x0, x14 +; CHECK-GI-NEXT: csel x14, x15, xzr, lo +; CHECK-GI-NEXT: sub x15, x10, #64 +; CHECK-GI-NEXT: lsr x17, x16, x10 +; CHECK-GI-NEXT: lsl x8, x12, x8 +; CHECK-GI-NEXT: csel x9, x9, x13, lo ; CHECK-GI-NEXT: tst x4, #0x7f -; CHECK-GI-NEXT: lsr x15, x14, x16 +; CHECK-GI-NEXT: lsr x13, x12, x15 ; CHECK-GI-NEXT: mvn x11, x4 -; CHECK-GI-NEXT: csel x12, x1, x12, eq -; CHECK-GI-NEXT: orr x10, x17, x10 -; CHECK-GI-NEXT: cmp x8, #64 -; CHECK-GI-NEXT: lsr x14, x14, x8 -; CHECK-GI-NEXT: csel x10, x10, x15, lo +; CHECK-GI-NEXT: csel x9, x1, x9, eq +; CHECK-GI-NEXT: orr x8, x17, x8 +; CHECK-GI-NEXT: cmp x10, #64 +; CHECK-GI-NEXT: lsr x12, x12, x10 +; CHECK-GI-NEXT: csel x8, x8, x13, lo ; CHECK-GI-NEXT: tst x11, #0x7f -; CHECK-GI-NEXT: csel x10, x13, x10, eq -; CHECK-GI-NEXT: cmp x8, #64 -; CHECK-GI-NEXT: csel x8, x14, xzr, lo -; CHECK-GI-NEXT: orr x0, x9, x10 -; CHECK-GI-NEXT: orr x1, x12, x8 +; CHECK-GI-NEXT: csel x8, x16, x8, eq +; CHECK-GI-NEXT: cmp x10, #64 +; CHECK-GI-NEXT: csel x10, x12, xzr, lo +; CHECK-GI-NEXT: orr x0, x14, x8 +; CHECK-GI-NEXT: orr x1, x9, x10 ; CHECK-GI-NEXT: ret %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z) ret i128 %f diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll index c57383ad9b1e7..247902ee26b67 100644 --- a/llvm/test/CodeGen/AArch64/rem-by-const.ll +++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll @@ -570,20 +570,18 @@ define i128 @ui128_7(i128 %a, i128 %b) { ; CHECK-GI-NEXT: add x8, x8, x10 ; CHECK-GI-NEXT: subs x10, x0, x9 ; CHECK-GI-NEXT: sbc x11, x1, x8 -; CHECK-GI-NEXT: lsl x12, x11, #63 +; CHECK-GI-NEXT: extr x10, x11, x10, #1 ; CHECK-GI-NEXT: lsr x11, x11, #1 -; CHECK-GI-NEXT: orr x10, x12, x10, lsr #1 ; CHECK-GI-NEXT: adds x9, x10, x9 +; CHECK-GI-NEXT: mov w10, #7 // =0x7 ; CHECK-GI-NEXT: adc x8, x11, x8 -; CHECK-GI-NEXT: lsl x10, x8, #62 +; CHECK-GI-NEXT: extr x9, x8, x9, #2 ; CHECK-GI-NEXT: lsr x8, x8, #2 -; CHECK-GI-NEXT: orr x9, x10, x9, lsr #2 -; CHECK-GI-NEXT: mov w10, #7 // =0x7 -; CHECK-GI-NEXT: lsl x12, x8, #3 ; CHECK-GI-NEXT: umulh x10, x9, x10 ; CHECK-GI-NEXT: lsl x11, x9, #3 -; CHECK-GI-NEXT: sub x8, x12, x8 +; CHECK-GI-NEXT: lsl x12, x8, #3 ; CHECK-GI-NEXT: sub x9, x11, x9 +; CHECK-GI-NEXT: sub x8, x12, x8 ; CHECK-GI-NEXT: subs x0, x0, x9 ; CHECK-GI-NEXT: add x8, x8, x10 ; CHECK-GI-NEXT: sbc x1, x1, x8 @@ -651,10 +649,9 @@ define i128 @ui128_100(i128 %a, i128 %b) { ; CHECK-GI-NEXT: add x10, x11, x12 ; CHECK-GI-NEXT: add x8, x8, x14 ; CHECK-GI-NEXT: add x8, x8, x10 -; CHECK-GI-NEXT: lsl x10, x8, #60 -; CHECK-GI-NEXT: lsr x8, x8, #4 -; CHECK-GI-NEXT: orr x9, x10, x9, lsr #4 ; CHECK-GI-NEXT: mov w10, #100 // =0x64 +; CHECK-GI-NEXT: extr x9, x8, x9, #4 +; CHECK-GI-NEXT: lsr x8, x8, #4 ; CHECK-GI-NEXT: umulh x11, x9, x10 ; CHECK-GI-NEXT: mul x9, x9, x10 ; CHECK-GI-NEXT: madd x8, x8, x10, x11 @@ -3318,36 +3315,32 @@ define <2 x i128> @uv2i128_7(<2 x i128> %d, <2 x i128> %e) { ; CHECK-GI-NEXT: sbc x14, x1, x12 ; CHECK-GI-NEXT: add x8, x8, x13 ; CHECK-GI-NEXT: subs x13, x2, x10 -; CHECK-GI-NEXT: lsl x15, x14, #63 -; CHECK-GI-NEXT: sbc x16, x3, x8 +; CHECK-GI-NEXT: extr x9, x14, x9, #1 +; CHECK-GI-NEXT: sbc x15, x3, x8 ; CHECK-GI-NEXT: lsr x14, x14, #1 -; CHECK-GI-NEXT: orr x9, x15, x9, lsr #1 -; CHECK-GI-NEXT: lsl x15, x16, #63 -; CHECK-GI-NEXT: orr x13, x15, x13, lsr #1 +; CHECK-GI-NEXT: extr x13, x15, x13, #1 ; CHECK-GI-NEXT: adds x9, x9, x11 -; CHECK-GI-NEXT: lsr x11, x16, #1 +; CHECK-GI-NEXT: lsr x11, x15, #1 ; CHECK-GI-NEXT: adc x12, x14, x12 ; CHECK-GI-NEXT: adds x10, x13, x10 -; CHECK-GI-NEXT: lsl x13, x12, #62 -; CHECK-GI-NEXT: lsr x12, x12, #2 -; CHECK-GI-NEXT: adc x8, x11, x8 -; CHECK-GI-NEXT: lsl x11, x8, #62 -; CHECK-GI-NEXT: orr x9, x13, x9, lsr #2 +; CHECK-GI-NEXT: extr x9, x12, x9, #2 ; CHECK-GI-NEXT: mov w13, #7 // =0x7 +; CHECK-GI-NEXT: adc x8, x11, x8 +; CHECK-GI-NEXT: lsr x11, x12, #2 +; CHECK-GI-NEXT: extr x10, x8, x10, #2 +; CHECK-GI-NEXT: umulh x12, x9, x13 ; CHECK-GI-NEXT: lsr x8, x8, #2 -; CHECK-GI-NEXT: lsl x14, x12, #3 -; CHECK-GI-NEXT: orr x10, x11, x10, lsr #2 -; CHECK-GI-NEXT: umulh x11, x9, x13 +; CHECK-GI-NEXT: lsl x14, x11, #3 ; CHECK-GI-NEXT: lsl x15, x9, #3 -; CHECK-GI-NEXT: sub x12, x14, x12 -; CHECK-GI-NEXT: lsl x16, x8, #3 ; CHECK-GI-NEXT: umulh x13, x10, x13 +; CHECK-GI-NEXT: lsl x16, x8, #3 +; CHECK-GI-NEXT: sub x11, x14, x11 ; CHECK-GI-NEXT: lsl x14, x10, #3 ; CHECK-GI-NEXT: sub x9, x15, x9 ; CHECK-GI-NEXT: sub x8, x16, x8 ; CHECK-GI-NEXT: subs x0, x0, x9 +; CHECK-GI-NEXT: add x11, x11, x12 ; CHECK-GI-NEXT: sub x10, x14, x10 -; CHECK-GI-NEXT: add x11, x12, x11 ; CHECK-GI-NEXT: sbc x1, x1, x11 ; CHECK-GI-NEXT: subs x2, x2, x10 ; CHECK-GI-NEXT: add x8, x8, x13 @@ -3395,9 +3388,10 @@ define <2 x i128> @uv2i128_100(<2 x i128> %d, <2 x i128> %e) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov x10, #23593 // =0x5c29 ; CHECK-GI-NEXT: mov x8, #62914 // =0xf5c2 -; CHECK-GI-NEXT: sub x18, x0, x0 +; CHECK-GI-NEXT: and x5, xzr, #0x1 ; CHECK-GI-NEXT: movk x10, #49807, lsl #16 ; CHECK-GI-NEXT: movk x8, #23592, lsl #16 +; CHECK-GI-NEXT: umulh x18, x0, xzr ; CHECK-GI-NEXT: movk x10, #10485, lsl #32 ; CHECK-GI-NEXT: movk x8, #49807, lsl #32 ; CHECK-GI-NEXT: movk x10, #36700, lsl #48 @@ -3410,84 +3404,81 @@ define <2 x i128> @uv2i128_100(<2 x i128> %d, <2 x i128> %e) { ; CHECK-GI-NEXT: umulh x15, x1, x10 ; CHECK-GI-NEXT: cset w12, hs ; CHECK-GI-NEXT: cmn x11, x13 -; CHECK-GI-NEXT: and x11, x12, #0x1 -; CHECK-GI-NEXT: umulh x16, x0, x8 -; CHECK-GI-NEXT: cset w12, hs +; CHECK-GI-NEXT: sub x13, x0, x0 ; CHECK-GI-NEXT: and x12, x12, #0x1 -; CHECK-GI-NEXT: add x14, x14, x18 -; CHECK-GI-NEXT: add x11, x11, x12 -; CHECK-GI-NEXT: and x12, xzr, #0x1 +; CHECK-GI-NEXT: umulh x16, x0, x8 +; CHECK-GI-NEXT: cset w11, hs +; CHECK-GI-NEXT: add x13, x14, x13 +; CHECK-GI-NEXT: and x11, x11, #0x1 +; CHECK-GI-NEXT: and x14, xzr, #0x1 ; CHECK-GI-NEXT: umulh x9, xzr, x10 -; CHECK-GI-NEXT: adds x14, x14, x15 -; CHECK-GI-NEXT: and x15, xzr, #0x1 +; CHECK-GI-NEXT: add x11, x12, x11 +; CHECK-GI-NEXT: add x12, x5, x14 +; CHECK-GI-NEXT: adds x13, x13, x15 ; CHECK-GI-NEXT: umulh x17, x1, x8 -; CHECK-GI-NEXT: cset w4, hs -; CHECK-GI-NEXT: add x15, x12, x15 -; CHECK-GI-NEXT: adds x12, x14, x16 -; CHECK-GI-NEXT: and x4, x4, #0x1 -; CHECK-GI-NEXT: mul x18, x3, x10 ; CHECK-GI-NEXT: cset w14, hs -; CHECK-GI-NEXT: adds x12, x12, x11 -; CHECK-GI-NEXT: add x11, x15, x4 ; CHECK-GI-NEXT: and x14, x14, #0x1 -; CHECK-GI-NEXT: cset w15, hs -; CHECK-GI-NEXT: mul x5, x2, x8 -; CHECK-GI-NEXT: add x11, x11, x14 -; CHECK-GI-NEXT: and x14, x15, #0x1 -; CHECK-GI-NEXT: add x17, x9, x17 -; CHECK-GI-NEXT: add x14, x11, x14 -; CHECK-GI-NEXT: mov w11, #100 // =0x64 -; CHECK-GI-NEXT: umulh x13, x0, xzr -; CHECK-GI-NEXT: umulh x16, x2, x10 -; CHECK-GI-NEXT: adds x18, x18, x5 -; CHECK-GI-NEXT: mul x15, x3, x8 -; CHECK-GI-NEXT: add x13, x17, x13 -; CHECK-GI-NEXT: cset w17, hs -; CHECK-GI-NEXT: umulh x10, x3, x10 -; CHECK-GI-NEXT: add x13, x13, x14 -; CHECK-GI-NEXT: and x17, x17, #0x1 -; CHECK-GI-NEXT: cmn x18, x16 -; CHECK-GI-NEXT: sub x18, x2, x2 -; CHECK-GI-NEXT: umulh x16, x2, x8 +; CHECK-GI-NEXT: adds x13, x13, x16 +; CHECK-GI-NEXT: mul x4, x3, x10 +; CHECK-GI-NEXT: add x12, x12, x14 ; CHECK-GI-NEXT: cset w14, hs -; CHECK-GI-NEXT: and x14, x14, #0x1 -; CHECK-GI-NEXT: add x15, x15, x18 +; CHECK-GI-NEXT: adds x11, x13, x11 +; CHECK-GI-NEXT: and x13, x14, #0x1 +; CHECK-GI-NEXT: mul x15, x2, x8 +; CHECK-GI-NEXT: cset w14, hs +; CHECK-GI-NEXT: add x12, x12, x13 +; CHECK-GI-NEXT: and x13, x14, #0x1 +; CHECK-GI-NEXT: add x14, x9, x17 +; CHECK-GI-NEXT: sub x17, x2, x2 +; CHECK-GI-NEXT: umulh x16, x2, x10 +; CHECK-GI-NEXT: add x12, x12, x13 +; CHECK-GI-NEXT: add x13, x14, x18 +; CHECK-GI-NEXT: add x12, x13, x12 ; CHECK-GI-NEXT: and x18, xzr, #0x1 -; CHECK-GI-NEXT: add x14, x17, x14 +; CHECK-GI-NEXT: mul x5, x3, x8 +; CHECK-GI-NEXT: extr x11, x12, x11, #4 +; CHECK-GI-NEXT: adds x13, x4, x15 +; CHECK-GI-NEXT: umulh x14, x3, x10 +; CHECK-GI-NEXT: cset w15, hs +; CHECK-GI-NEXT: mov w10, #100 // =0x64 +; CHECK-GI-NEXT: cmn x13, x16 +; CHECK-GI-NEXT: and x15, x15, #0x1 +; CHECK-GI-NEXT: umulh x13, x2, x8 +; CHECK-GI-NEXT: cset w16, hs +; CHECK-GI-NEXT: add x17, x5, x17 +; CHECK-GI-NEXT: and x16, x16, #0x1 ; CHECK-GI-NEXT: umulh x8, x3, x8 +; CHECK-GI-NEXT: add x15, x15, x16 +; CHECK-GI-NEXT: adds x14, x17, x14 ; CHECK-GI-NEXT: and x17, xzr, #0x1 -; CHECK-GI-NEXT: adds x10, x15, x10 -; CHECK-GI-NEXT: add x15, x17, x18 +; CHECK-GI-NEXT: add x16, x18, x17 ; CHECK-GI-NEXT: cset w17, hs -; CHECK-GI-NEXT: umulh x18, x2, xzr +; CHECK-GI-NEXT: adds x13, x14, x13 +; CHECK-GI-NEXT: umulh x14, x2, xzr ; CHECK-GI-NEXT: and x17, x17, #0x1 -; CHECK-GI-NEXT: adds x10, x10, x16 -; CHECK-GI-NEXT: lsl x16, x13, #60 -; CHECK-GI-NEXT: add x15, x15, x17 -; CHECK-GI-NEXT: cset w17, hs -; CHECK-GI-NEXT: adds x10, x10, x14 -; CHECK-GI-NEXT: and x14, x17, #0x1 +; CHECK-GI-NEXT: cset w18, hs +; CHECK-GI-NEXT: adds x13, x13, x15 +; CHECK-GI-NEXT: add x15, x16, x17 +; CHECK-GI-NEXT: and x16, x18, #0x1 ; CHECK-GI-NEXT: cset w17, hs ; CHECK-GI-NEXT: add x8, x9, x8 -; CHECK-GI-NEXT: add x14, x15, x14 -; CHECK-GI-NEXT: and x15, x17, #0x1 -; CHECK-GI-NEXT: orr x12, x16, x12, lsr #4 -; CHECK-GI-NEXT: add x9, x14, x15 -; CHECK-GI-NEXT: add x8, x8, x18 -; CHECK-GI-NEXT: add x8, x8, x9 -; CHECK-GI-NEXT: lsr x9, x13, #4 -; CHECK-GI-NEXT: umulh x14, x12, x11 -; CHECK-GI-NEXT: lsl x13, x8, #60 +; CHECK-GI-NEXT: add x15, x15, x16 +; CHECK-GI-NEXT: and x16, x17, #0x1 +; CHECK-GI-NEXT: lsr x9, x12, #4 +; CHECK-GI-NEXT: add x15, x15, x16 +; CHECK-GI-NEXT: umulh x17, x11, x10 +; CHECK-GI-NEXT: add x8, x8, x14 +; CHECK-GI-NEXT: add x8, x8, x15 +; CHECK-GI-NEXT: mul x11, x11, x10 +; CHECK-GI-NEXT: extr x12, x8, x13, #4 ; CHECK-GI-NEXT: lsr x8, x8, #4 -; CHECK-GI-NEXT: mul x12, x12, x11 -; CHECK-GI-NEXT: orr x10, x13, x10, lsr #4 -; CHECK-GI-NEXT: madd x9, x9, x11, x14 -; CHECK-GI-NEXT: umulh x13, x10, x11 -; CHECK-GI-NEXT: subs x0, x0, x12 -; CHECK-GI-NEXT: mul x10, x10, x11 +; CHECK-GI-NEXT: madd x9, x9, x10, x17 +; CHECK-GI-NEXT: umulh x13, x12, x10 +; CHECK-GI-NEXT: subs x0, x0, x11 +; CHECK-GI-NEXT: mul x12, x12, x10 ; CHECK-GI-NEXT: sbc x1, x1, x9 -; CHECK-GI-NEXT: madd x8, x8, x11, x13 -; CHECK-GI-NEXT: subs x2, x2, x10 +; CHECK-GI-NEXT: madd x8, x8, x10, x13 +; CHECK-GI-NEXT: subs x2, x2, x12 ; CHECK-GI-NEXT: sbc x3, x3, x8 ; CHECK-GI-NEXT: ret entry: