-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[AArch64][GlobalISel] Add a constant funnel shift post-legalizer combine. #151912
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…ine. We want to be able to produce extr instructions post-legalization. They are legal for scalars, acting as a funnel shifts with a constant shift amount. Unfortunately I'm not sure if there is a way currently to represent that in the legalization rules, but it might be useful for several operations - to be able to treat and test operands with constant operands as legal or not. This adds a change to the existing matchOrShiftToFunnelShift so that AArch64 can generate such instructions post-legalization providing that the operation is scalar and the shift amount is constant. It doesn't feel like the best solution - any thoughts on alternatives?
This adds a change to the existing matchOrShiftToFunnelShift so that AArch64 can generate such instructions post-legalization providing that the operation is scalar and the shift amount is constant. It doesn't feel like the best solution - any thoughts on alternatives? |
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) ChangesWe want to be able to produce extr instructions post-legalization. They are legal for scalars, acting as a funnel shifts with a constant shift amount. Unfortunately I'm not sure if there is a way currently to represent that in the legalization rules, but it might be useful for several operations - to be able to treat and test operands with constant operands as legal or not. This adds a change to the existing matchOrShiftToFunnelShift so that AArch64 can generate such instructions post-legalization providing that the operation is scalar and the shift amount is constant. It doesn't feel like the best solution - any thoughts on alternatives? Patch is 39.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151912.diff 8 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index da829046cc421..9051fd0e4474c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -627,7 +627,8 @@ class CombinerHelper {
/// This variant does not erase \p MI after calling the build function.
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo) const;
- bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo) const;
+ bool matchOrShiftToFunnelShift(MachineInstr &MI, bool ScalarConstantsAreLegal,
+ BuildFnTy &MatchInfo) const;
bool matchFunnelShiftToRotate(MachineInstr &MI) const;
void applyFunnelShiftToRotate(MachineInstr &MI) const;
bool matchRotateOutOfRange(MachineInstr &MI) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index b619de39a8c75..c417da7c8b88f 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1000,10 +1000,18 @@ def extract_vec_elt_combines : GICombineGroup<[
def funnel_shift_from_or_shift : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
(match (wip_match_opcode G_OR):$root,
- [{ return Helper.matchOrShiftToFunnelShift(*${root}, ${info}); }]),
+ [{ return Helper.matchOrShiftToFunnelShift(*${root}, false, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])
>;
+def funnel_shift_from_or_shift_constants_are_legal : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_OR):$root,
+ [{ return Helper.matchOrShiftToFunnelShift(*${root}, true, ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])
+>;
+
+
def funnel_shift_to_rotate : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_FSHL, G_FSHR):$root,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index e84ba91c47c8b..80a58b0bf2858 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -4390,6 +4390,7 @@ void CombinerHelper::applyBuildFnNoErase(
}
bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI,
+ bool ScalarConstantsAreLegal,
BuildFnTy &MatchInfo) const {
assert(MI.getOpcode() == TargetOpcode::G_OR);
@@ -4409,31 +4410,29 @@ bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI,
// Given constants C0 and C1 such that C0 + C1 is bit-width:
// (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
- int64_t CstShlAmt, CstLShrAmt;
+ int64_t CstShlAmt = 0, CstLShrAmt;
if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
CstShlAmt + CstLShrAmt == BitWidth) {
FshOpc = TargetOpcode::G_FSHR;
Amt = LShrAmt;
-
} else if (mi_match(LShrAmt, MRI,
m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) &&
ShlAmt == Amt) {
// (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
FshOpc = TargetOpcode::G_FSHL;
-
} else if (mi_match(ShlAmt, MRI,
m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) &&
LShrAmt == Amt) {
// (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
FshOpc = TargetOpcode::G_FSHR;
-
} else {
return false;
}
LLT AmtTy = MRI.getType(Amt);
- if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
+ if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}) &&
+ (!ScalarConstantsAreLegal || CstShlAmt == 0 || !Ty.isScalar()))
return false;
MatchInfo = [=](MachineIRBuilder &B) {
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 99f0af5f6a3f8..81de366c32e72 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -367,5 +367,6 @@ def AArch64PostLegalizerCombiner
select_to_minmax, or_to_bsp, combine_concat_vector,
commute_constant_to_rhs, extract_vec_elt_combines,
push_freeze_to_prevent_poison_from_propagating,
- combine_mul_cmlt, combine_use_vector_truncate, extmultomull]> {
+ combine_mul_cmlt, combine_use_vector_truncate, extmultomull,
+ funnel_shift_from_or_shift_constants_are_legal]> {
}
diff --git a/llvm/test/CodeGen/AArch64/adc.ll b/llvm/test/CodeGen/AArch64/adc.ll
index 12e8bf26c9eac..03f3cf192102d 100644
--- a/llvm/test/CodeGen/AArch64/adc.ll
+++ b/llvm/test/CodeGen/AArch64/adc.ll
@@ -71,9 +71,8 @@ define i128 @test_shifted(i128 %a, i128 %b) {
;
; CHECK-GI-LABEL: test_shifted:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: lsr x8, x2, #19
+; CHECK-GI-NEXT: extr x8, x3, x2, #19
; CHECK-GI-NEXT: adds x0, x0, x2, lsl #45
-; CHECK-GI-NEXT: orr x8, x8, x3, lsl #45
; CHECK-GI-NEXT: adc x1, x1, x8
; CHECK-GI-NEXT: ret
%rhs = shl i128 %b, 45
@@ -108,8 +107,7 @@ define i128 @test_extended(i128 %a, i16 %b) {
; CHECK-GI-NEXT: sxth x8, w2
; CHECK-GI-NEXT: adds x0, x0, w2, sxth #3
; CHECK-GI-NEXT: asr x9, x8, #63
-; CHECK-GI-NEXT: lsr x8, x8, #61
-; CHECK-GI-NEXT: orr x8, x8, x9, lsl #3
+; CHECK-GI-NEXT: extr x8, x9, x8, #61
; CHECK-GI-NEXT: adc x1, x1, x8
; CHECK-GI-NEXT: ret
%ext = sext i16 %b to i128
diff --git a/llvm/test/CodeGen/AArch64/fsh.ll b/llvm/test/CodeGen/AArch64/fsh.ll
index 4c28c90824028..9eb2e3de2b2b6 100644
--- a/llvm/test/CodeGen/AArch64/fsh.ll
+++ b/llvm/test/CodeGen/AArch64/fsh.ll
@@ -510,41 +510,40 @@ define i128 @fshl_i128(i128 %a, i128 %b, i128 %c) {
;
; CHECK-GI-LABEL: fshl_i128:
; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, #64 // =0x40
; CHECK-GI-NEXT: and x9, x4, #0x7f
-; CHECK-GI-NEXT: mov w10, #64 // =0x40
-; CHECK-GI-NEXT: lsl x14, x3, #63
-; CHECK-GI-NEXT: sub x12, x10, x9
+; CHECK-GI-NEXT: mov w10, #127 // =0x7f
+; CHECK-GI-NEXT: sub x12, x8, x9
; CHECK-GI-NEXT: lsl x13, x1, x9
-; CHECK-GI-NEXT: mov w8, #127 // =0x7f
+; CHECK-GI-NEXT: bic x10, x10, x4
; CHECK-GI-NEXT: lsr x12, x0, x12
-; CHECK-GI-NEXT: bic x8, x8, x4
-; CHECK-GI-NEXT: sub x15, x9, #64
+; CHECK-GI-NEXT: sub x14, x9, #64
+; CHECK-GI-NEXT: lsl x15, x0, x9
+; CHECK-GI-NEXT: extr x16, x3, x2, #1
; CHECK-GI-NEXT: cmp x9, #64
-; CHECK-GI-NEXT: lsl x9, x0, x9
-; CHECK-GI-NEXT: lsl x15, x0, x15
-; CHECK-GI-NEXT: orr x12, x12, x13
-; CHECK-GI-NEXT: orr x13, x14, x2, lsr #1
-; CHECK-GI-NEXT: lsr x14, x3, #1
-; CHECK-GI-NEXT: sub x10, x10, x8
-; CHECK-GI-NEXT: sub x16, x8, #64
-; CHECK-GI-NEXT: csel x9, x9, xzr, lo
-; CHECK-GI-NEXT: lsr x17, x13, x8
-; CHECK-GI-NEXT: lsl x10, x14, x10
-; CHECK-GI-NEXT: csel x12, x12, x15, lo
+; CHECK-GI-NEXT: sub x8, x8, x10
+; CHECK-GI-NEXT: orr x9, x12, x13
+; CHECK-GI-NEXT: lsr x12, x3, #1
+; CHECK-GI-NEXT: lsl x13, x0, x14
+; CHECK-GI-NEXT: csel x14, x15, xzr, lo
+; CHECK-GI-NEXT: sub x15, x10, #64
+; CHECK-GI-NEXT: lsr x17, x16, x10
+; CHECK-GI-NEXT: lsl x8, x12, x8
+; CHECK-GI-NEXT: csel x9, x9, x13, lo
; CHECK-GI-NEXT: tst x4, #0x7f
-; CHECK-GI-NEXT: lsr x15, x14, x16
+; CHECK-GI-NEXT: lsr x13, x12, x15
; CHECK-GI-NEXT: mvn x11, x4
-; CHECK-GI-NEXT: csel x12, x1, x12, eq
-; CHECK-GI-NEXT: orr x10, x17, x10
-; CHECK-GI-NEXT: cmp x8, #64
-; CHECK-GI-NEXT: lsr x14, x14, x8
-; CHECK-GI-NEXT: csel x10, x10, x15, lo
+; CHECK-GI-NEXT: csel x9, x1, x9, eq
+; CHECK-GI-NEXT: orr x8, x17, x8
+; CHECK-GI-NEXT: cmp x10, #64
+; CHECK-GI-NEXT: lsr x12, x12, x10
+; CHECK-GI-NEXT: csel x8, x8, x13, lo
; CHECK-GI-NEXT: tst x11, #0x7f
-; CHECK-GI-NEXT: csel x10, x13, x10, eq
-; CHECK-GI-NEXT: cmp x8, #64
-; CHECK-GI-NEXT: csel x8, x14, xzr, lo
-; CHECK-GI-NEXT: orr x0, x9, x10
-; CHECK-GI-NEXT: orr x1, x12, x8
+; CHECK-GI-NEXT: csel x8, x16, x8, eq
+; CHECK-GI-NEXT: cmp x10, #64
+; CHECK-GI-NEXT: csel x10, x12, xzr, lo
+; CHECK-GI-NEXT: orr x0, x14, x8
+; CHECK-GI-NEXT: orr x1, x9, x10
; CHECK-GI-NEXT: ret
entry:
%d = call i128 @llvm.fshl(i128 %a, i128 %b, i128 %c)
@@ -571,41 +570,40 @@ define i128 @fshr_i128(i128 %a, i128 %b, i128 %c) {
;
; CHECK-GI-LABEL: fshr_i128:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: lsr x8, x0, #63
-; CHECK-GI-NEXT: mov w9, #127 // =0x7f
-; CHECK-GI-NEXT: mov w10, #64 // =0x40
-; CHECK-GI-NEXT: bic x9, x9, x4
-; CHECK-GI-NEXT: lsl x11, x0, #1
-; CHECK-GI-NEXT: and x12, x4, #0x7f
-; CHECK-GI-NEXT: orr x8, x8, x1, lsl #1
-; CHECK-GI-NEXT: sub x14, x10, x9
-; CHECK-GI-NEXT: sub x17, x9, #64
-; CHECK-GI-NEXT: lsl x15, x11, x9
-; CHECK-GI-NEXT: lsr x14, x11, x14
-; CHECK-GI-NEXT: cmp x9, #64
-; CHECK-GI-NEXT: lsl x16, x8, x9
-; CHECK-GI-NEXT: sub x9, x10, x12
-; CHECK-GI-NEXT: lsl x10, x11, x17
-; CHECK-GI-NEXT: mvn x13, x4
-; CHECK-GI-NEXT: csel x11, x15, xzr, lo
-; CHECK-GI-NEXT: sub x15, x12, #64
-; CHECK-GI-NEXT: orr x14, x14, x16
-; CHECK-GI-NEXT: lsr x16, x2, x12
-; CHECK-GI-NEXT: lsl x9, x3, x9
-; CHECK-GI-NEXT: csel x10, x14, x10, lo
-; CHECK-GI-NEXT: tst x13, #0x7f
-; CHECK-GI-NEXT: lsr x13, x3, x15
-; CHECK-GI-NEXT: csel x8, x8, x10, eq
-; CHECK-GI-NEXT: orr x9, x16, x9
-; CHECK-GI-NEXT: cmp x12, #64
-; CHECK-GI-NEXT: lsr x10, x3, x12
-; CHECK-GI-NEXT: csel x9, x9, x13, lo
+; CHECK-GI-NEXT: mov w8, #127 // =0x7f
+; CHECK-GI-NEXT: lsl x9, x0, #1
+; CHECK-GI-NEXT: extr x10, x1, x0, #63
+; CHECK-GI-NEXT: bic x8, x8, x4
+; CHECK-GI-NEXT: mov w11, #64 // =0x40
+; CHECK-GI-NEXT: and x14, x4, #0x7f
+; CHECK-GI-NEXT: sub x12, x11, x8
+; CHECK-GI-NEXT: lsl x13, x10, x8
+; CHECK-GI-NEXT: lsl x16, x9, x8
+; CHECK-GI-NEXT: lsr x12, x9, x12
+; CHECK-GI-NEXT: sub x17, x8, #64
+; CHECK-GI-NEXT: cmp x8, #64
+; CHECK-GI-NEXT: lsl x8, x9, x17
+; CHECK-GI-NEXT: sub x11, x11, x14
+; CHECK-GI-NEXT: mvn x15, x4
+; CHECK-GI-NEXT: orr x12, x12, x13
+; CHECK-GI-NEXT: csel x9, x16, xzr, lo
+; CHECK-GI-NEXT: sub x13, x14, #64
+; CHECK-GI-NEXT: lsr x16, x2, x14
+; CHECK-GI-NEXT: lsl x11, x3, x11
+; CHECK-GI-NEXT: csel x8, x12, x8, lo
+; CHECK-GI-NEXT: tst x15, #0x7f
+; CHECK-GI-NEXT: lsr x12, x3, x13
+; CHECK-GI-NEXT: csel x8, x10, x8, eq
+; CHECK-GI-NEXT: orr x10, x16, x11
+; CHECK-GI-NEXT: cmp x14, #64
+; CHECK-GI-NEXT: lsr x11, x3, x14
+; CHECK-GI-NEXT: csel x10, x10, x12, lo
; CHECK-GI-NEXT: tst x4, #0x7f
-; CHECK-GI-NEXT: csel x9, x2, x9, eq
-; CHECK-GI-NEXT: cmp x12, #64
-; CHECK-GI-NEXT: csel x10, x10, xzr, lo
-; CHECK-GI-NEXT: orr x0, x11, x9
-; CHECK-GI-NEXT: orr x1, x8, x10
+; CHECK-GI-NEXT: csel x10, x2, x10, eq
+; CHECK-GI-NEXT: cmp x14, #64
+; CHECK-GI-NEXT: csel x11, x11, xzr, lo
+; CHECK-GI-NEXT: orr x0, x9, x10
+; CHECK-GI-NEXT: orr x1, x8, x11
; CHECK-GI-NEXT: ret
entry:
%d = call i128 @llvm.fshr(i128 %a, i128 %b, i128 %c)
@@ -720,10 +718,9 @@ define i128 @rotl_i128_c(i128 %a) {
;
; CHECK-GI-LABEL: rotl_i128_c:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: lsr x8, x0, #61
-; CHECK-GI-NEXT: lsr x9, x1, #61
-; CHECK-GI-NEXT: orr x1, x8, x1, lsl #3
-; CHECK-GI-NEXT: orr x0, x9, x0, lsl #3
+; CHECK-GI-NEXT: extr x8, x1, x0, #61
+; CHECK-GI-NEXT: extr x0, x0, x1, #61
+; CHECK-GI-NEXT: mov x1, x8
; CHECK-GI-NEXT: ret
entry:
%d = call i128 @llvm.fshl(i128 %a, i128 %a, i128 3)
@@ -731,20 +728,12 @@ entry:
}
define i128 @rotr_i128_c(i128 %a) {
-; CHECK-SD-LABEL: rotr_i128_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: extr x8, x1, x0, #3
-; CHECK-SD-NEXT: extr x1, x0, x1, #3
-; CHECK-SD-NEXT: mov x0, x8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: rotr_i128_c:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: lsl x8, x1, #61
-; CHECK-GI-NEXT: lsl x9, x0, #61
-; CHECK-GI-NEXT: orr x0, x8, x0, lsr #3
-; CHECK-GI-NEXT: orr x1, x9, x1, lsr #3
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: rotr_i128_c:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: extr x8, x1, x0, #3
+; CHECK-NEXT: extr x1, x0, x1, #3
+; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: ret
entry:
%d = call i128 @llvm.fshr(i128 %a, i128 %a, i128 3)
ret i128 %d
@@ -868,10 +857,8 @@ define i128 @fshl_i128_c(i128 %a, i128 %b) {
;
; CHECK-GI-LABEL: fshl_i128_c:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: lsr x8, x0, #61
-; CHECK-GI-NEXT: lsr x9, x3, #61
-; CHECK-GI-NEXT: orr x1, x8, x1, lsl #3
-; CHECK-GI-NEXT: orr x0, x9, x0, lsl #3
+; CHECK-GI-NEXT: extr x1, x1, x0, #61
+; CHECK-GI-NEXT: extr x0, x0, x3, #61
; CHECK-GI-NEXT: ret
entry:
%d = call i128 @llvm.fshl(i128 %a, i128 %b, i128 3)
@@ -879,21 +866,12 @@ entry:
}
define i128 @fshr_i128_c(i128 %a, i128 %b) {
-; CHECK-SD-LABEL: fshr_i128_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: extr x8, x3, x2, #3
-; CHECK-SD-NEXT: extr x1, x0, x3, #3
-; CHECK-SD-NEXT: mov x0, x8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fshr_i128_c:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: lsl x8, x3, #61
-; CHECK-GI-NEXT: lsr x9, x3, #3
-; CHECK-GI-NEXT: orr x8, x8, x2, lsr #3
-; CHECK-GI-NEXT: orr x1, x9, x0, lsl #61
-; CHECK-GI-NEXT: mov x0, x8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fshr_i128_c:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: extr x8, x3, x2, #3
+; CHECK-NEXT: extr x1, x0, x3, #3
+; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: ret
entry:
%d = call i128 @llvm.fshr(i128 %a, i128 %b, i128 3)
ret i128 %d
@@ -3012,75 +2990,73 @@ define <2 x i128> @fshl_v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) {
; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
; CHECK-GI-NEXT: .cfi_offset w19, -16
; CHECK-GI-NEXT: ldr x11, [sp, #16]
-; CHECK-GI-NEXT: mov w10, #64 // =0x40
+; CHECK-GI-NEXT: mov w9, #64 // =0x40
; CHECK-GI-NEXT: ldr x12, [sp, #32]
; CHECK-GI-NEXT: mov w13, #127 // =0x7f
-; CHECK-GI-NEXT: and x9, x11, #0x7f
+; CHECK-GI-NEXT: and x8, x11, #0x7f
; CHECK-GI-NEXT: and x14, x12, #0x7f
-; CHECK-GI-NEXT: mvn x15, x11
-; CHECK-GI-NEXT: sub x8, x10, x9
-; CHECK-GI-NEXT: sub x16, x9, #64
-; CHECK-GI-NEXT: lsl x19, x1, x9
-; CHECK-GI-NEXT: lsr x18, x0, x8
-; CHECK-GI-NEXT: lsl x17, x0, x9
-; CHECK-GI-NEXT: lsl x16, x0, x16
-; CHECK-GI-NEXT: cmp x9, #64
-; CHECK-GI-NEXT: bic x0, x13, x11
-; CHECK-GI-NEXT: mvn x8, x12
-; CHECK-GI-NEXT: orr x18, x18, x19
-; CHECK-GI-NEXT: csel x9, x17, xzr, lo
+; CHECK-GI-NEXT: mvn x18, x11
+; CHECK-GI-NEXT: sub x10, x9, x8
+; CHECK-GI-NEXT: sub x15, x8, #64
+; CHECK-GI-NEXT: lsl x17, x1, x8
+; CHECK-GI-NEXT: lsr x16, x0, x10
+; CHECK-GI-NEXT: lsl x15, x0, x15
+; CHECK-GI-NEXT: cmp x8, #64
+; CHECK-GI-NEXT: lsl x19, x0, x8
+; CHECK-GI-NEXT: lsl x0, x3, x14
+; CHECK-GI-NEXT: mvn x10, x12
+; CHECK-GI-NEXT: orr x16, x16, x17
; CHECK-GI-NEXT: sub x17, x14, #64
-; CHECK-GI-NEXT: csel x16, x18, x16, lo
+; CHECK-GI-NEXT: csel x15, x16, x15, lo
+; CHECK-GI-NEXT: sub x16, x9, x14
+; CHECK-GI-NEXT: csel x8, x19, xzr, lo
+; CHECK-GI-NEXT: lsr x16, x2, x16
; CHECK-GI-NEXT: tst x11, #0x7f
-; CHECK-GI-NEXT: sub x11, x10, x14
-; CHECK-GI-NEXT: lsr x11, x2, x11
-; CHECK-GI-NEXT: lsl x18, x3, x14
-; CHECK-GI-NEXT: csel x16, x1, x16, eq
-; CHECK-GI-NEXT: lsl x1, x2, x14
+; CHECK-GI-NEXT: lsl x19, x2, x14
; CHECK-GI-NEXT: lsl x17, x2, x17
+; CHECK-GI-NEXT: csel x15, x1, x15, eq
; CHECK-GI-NEXT: cmp x14, #64
-; CHECK-GI-NEXT: lsl x14, x5, #63
-; CHECK-GI-NEXT: orr x11, x11, x18
-; CHECK-GI-NEXT: bic x13, x13, x12
-; CHECK-GI-NEXT: csel x18, x1, xzr, lo
-; CHECK-GI-NEXT: csel x11, x11, x17, lo
+; CHECK-GI-NEXT: orr x16, x16, x0
+; CHECK-GI-NEXT: bic x11, x13, x11
+; CHECK-GI-NEXT: csel x14, x19, xzr, lo
+; CHECK-GI-NEXT: csel x16, x16, x17, lo
; CHECK-GI-NEXT: tst x12, #0x7f
-; CHECK-GI-NEXT: lsr x12, x5, #1
-; CHECK-GI-NEXT: orr x14, x14, x4, lsr #1
-; CHECK-GI-NEXT: lsl x17, x7, #63
-; CHECK-GI-NEXT: sub x1, x10, x0
-; CHECK-GI-NEXT: csel x11, x3, x11, eq
-; CHECK-GI-NEXT: sub x2, x0, #64
-; CHECK-GI-NEXT: lsr x3, x14, x0
-; CHECK-GI-NEXT: lsl x1, x12, x1
-; CHECK-GI-NEXT: lsr x4, x7, #1
-; CHECK-GI-NEXT: orr x17, x17, x6, lsr #1
-; CHECK-GI-NEXT: lsr x2, x12, x2
-; CHECK-GI-NEXT: cmp x0, #64
-; CHECK-GI-NEXT: orr x1, x3, x1
-; CHECK-GI-NEXT: sub x10, x10, x13
-; CHECK-GI-NEXT: lsr x12, x12, x0
-; CHECK-GI-NEXT: csel x1, x1, x2, lo
-; CHECK-GI-NEXT: tst x15, #0x7f
-; CHECK-GI-NEXT: sub x15, x13, #64
-; CHECK-GI-NEXT: lsr x2, x17, x13
-; CHECK-GI-NEXT: lsl x10, x4, x10
-; CHECK-GI-NEXT: csel x14, x14, x1, eq
-; CHECK-GI-NEXT: cmp x0, #64
-; CHECK-GI-NEXT: lsr x15, x4, x15
-; CHECK-GI-NEXT: lsr x0, x4, x13
-; CHECK-GI-NEXT: csel x12, x12, xzr, lo
-; CHECK-GI-NEXT: orr x10, x2, x10
-; CHECK-GI-NEXT: cmp x13, #64
-; CHECK-GI-NEXT: csel x10, x10, x15, lo
-; CHECK-GI-NEXT: tst x8, #0x7f
-; CHECK-GI-NEXT: orr x1, x16, x12
-; CHECK-GI-NEXT: csel x8, x17, x10, eq
-; CHECK-GI-NEXT: cmp x13, #64
-; CHECK-GI-NEXT: csel x10, x0, xzr, lo
-; CHECK-GI-NEXT: orr x0, x9, x14
-; CHECK-GI-NEXT: orr x2, x18, x8
-; CHECK-GI-NEXT: orr x3, x11, x10
+; CHECK-GI-NEXT: lsr x17, x5, #1
+; CHECK-GI-NEXT: extr x0, x5, x4, #1
+; CHECK-GI-NEXT: bic x12, x13, x12
+; CHECK-GI-NEXT: csel x13, x3, x16, eq
+; CHECK-GI-NEXT: sub x16, x9, x11
+; CHECK-GI-NEXT: sub x1, x11, #64
+; CHECK-GI-NEXT: lsr x3, x7, #1
+; CHECK-GI-NEXT: lsr x2, x0, x11
+; CHECK-GI-NEXT: lsl x16, x17, x16
+; CHECK-GI-NEXT: extr x4, x7, x6, #1
+; CHECK-GI-NEXT: lsr x1, x17, x1
+; CHECK-GI-NEXT: cmp x11, #64
+; CHECK-GI-NEXT: sub x9, x9, x12
+; CHECK-GI-NEXT: orr x16, x2, x16
+; CHECK-GI-NEXT: lsr x17, x17, x11
+; CHECK-GI-NEXT: lsl x9, x3, x9
+; CHECK-GI-NEXT: csel x16, x16, x1, lo
+; CHECK-GI-NEXT: tst x18, #0x7f
+; CHECK-GI-NEXT: sub x18, x12, #64
+; CHECK-GI-NEXT: lsr x1, x4, x12
+; CHECK-GI-NEXT: csel x16, x0, x16, eq
+; CHECK-GI-NEXT: cmp x11, #64
+; CHECK-GI-NEXT: lsr x11, x3, x18
+; CHECK-GI-NEXT: csel x17, x17, xzr, lo
+; CHECK-GI-NEXT: cmp x12, #64
+; CHECK-GI-NEXT: orr x9, x1, x9
+; CHECK-GI-NEXT: lsr x18, x3, x12
+; CHECK-GI-NEXT: orr x0, x8, x16
+; CHECK-GI-NEXT: csel x9, x9, x11, lo
+; CHECK-GI-NEXT: tst x10, #0x7f
+; CHECK-GI-NEXT: orr x1, x15, x17
+; CHECK-GI-NEXT: csel x9, x4, x9, eq
+; CHECK-GI-NEXT: cmp x12, #64
+; CHECK-GI-NEXT: csel x10, x18, xzr, lo
+; CHECK-GI-NEXT: orr x2, x14, x9
+; CHECK-GI-NEXT: orr x3, x13, x10
; CHECK-GI-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload
; CHECK-GI-NEXT: ret
entry:
@@ -3124,75 +3100,73 @@ define <2 x i128> @fshr_v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) {
; CHECK-GI-LABEL: fshr_v2i128:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr x9, [sp]
-; CHECK-GI-NEXT: lsl x12, x1, #1
-; CHECK-GI-NEXT: mov w11, #127 // =0x7f
-; CHECK-GI-NEXT: mov w14, #64 // =0x40
-; CHECK-GI-NEXT: lsl x15, x0, #1
+; CHECK-GI-NEXT: mov w10, #127 // =0x7f
+; CHECK-GI-NEXT: mov w12, #64 // =0x40
+; CHECK-GI-NEXT: lsl x13, x0, #1
+; CHECK-GI-NEXT: extr x14, x1, x0, #63
; CHECK-GI-NEXT: ldr x8, [sp, #16]
-; CHECK-GI-NEXT: bic x13, x11, x9
-; CHECK-GI-NEXT: orr x12, x12, x0, lsr #63
-; CHECK-GI-NEXT: lsl x1, x3, #1
-; CHECK-GI-NEXT: sub x17, x14, x13
-; CHECK-GI-NEXT: sub x18, x13, #64
-; CHECK-GI-NEXT: lsl x3, x15, x13
-; CHECK-GI-NEXT: lsr x17, x15, x17
-; CHECK-GI-NEXT: lsl x0, x12, x13
-; CHECK-GI-NEXT: lsl x15, x15, x18
-; CHECK-GI-NEXT: bic x11, x11, x8
+; CHECK-GI-NEXT: bic x11, x10, x9
+; CHECK-GI-NEXT: mvn x16, x9
+; CHECK-GI-NEXT: and x15, x9, #0x7f
+; CHECK-GI-NEXT: sub x17, x12, x11
+; CHECK-GI-NEXT: sub x18, x11, #64
+; CHECK-GI-NEXT: lsl x0, x14, x11
+; ...
[truncated]
|
We want to be able to produce extr instructions post-legalization. They are legal for scalars, acting as a funnel shifts with a constant shift amount. Unfortunately I'm not sure if there is a way currently to represent that in the legalization rules, but it might be useful for several operations - to be able to treat and test operands with constant operands as legal or not.
This adds a change to the existing matchOrShiftToFunnelShift so that AArch64 can generate such instructions post-legalization providing that the operation is scalar and the shift amount is constant. It doesn't feel like the best solution - any thoughts on alternatives?