From 817186fca35ffce00ed68fc4ec84b2637a712e07 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 4 Aug 2025 07:48:06 +0100 Subject: [PATCH] [AArch64] Treat single-vector ext as legal shuffle masks. We can generate ext from shuffles like <2, 3, 0, 1> from a single vector source. Add handling to isShuffleMaskLegal to allow DAG combines to optimize to it. --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 5 +++-- llvm/test/CodeGen/AArch64/arm64-ext.ll | 10 ++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 2b6ea86ee1af5..40990d52ca8dc 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13477,7 +13477,7 @@ static bool isEXTMask(ArrayRef M, EVT VT, bool &ReverseEXT, // Look for the first non-undef element. const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; }); - // Benefit form APInt to handle overflow when calculating expected element. + // Benefit from APInt to handle overflow when calculating expected element. unsigned NumElts = VT.getVectorNumElements(); unsigned MaskBits = APInt(32, NumElts * 2).logBase2(); APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1, /*isSigned=*/false, @@ -13485,7 +13485,7 @@ static bool isEXTMask(ArrayRef M, EVT VT, bool &ReverseEXT, // The following shuffle indices must be the successive elements after the // first real element. bool FoundWrongElt = std::any_of(FirstRealElt + 1, M.end(), [&](int Elt) { - return Elt != ExpectedElt++ && Elt != -1; + return Elt != ExpectedElt++ && Elt >= 0; }); if (FoundWrongElt) return false; @@ -15772,6 +15772,7 @@ bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { isREVMask(M, EltSize, NumElts, 32) || isREVMask(M, EltSize, NumElts, 16) || isEXTMask(M, VT, DummyBool, DummyUnsigned) || + isSingletonEXTMask(M, VT, DummyUnsigned) || isTRNMask(M, NumElts, DummyUnsigned) || isUZPMask(M, NumElts, DummyUnsigned) || isZIPMask(M, NumElts, DummyUnsigned) || diff --git a/llvm/test/CodeGen/AArch64/arm64-ext.ll b/llvm/test/CodeGen/AArch64/arm64-ext.ll index 8bf2b826d7101..c3670579c9148 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ext.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ext.ll @@ -139,9 +139,8 @@ define <2 x ptr> @test_v2p0(<2 x ptr> %a, <2 x ptr> %b) { define <16 x i8> @reverse_vector_s8x16b(<16 x i8> noundef %x) { ; CHECK-SD-LABEL: reverse_vector_s8x16b: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: rev64 v1.16b, v0.16b -; CHECK-SD-NEXT: ext v0.16b, v1.16b, v1.16b, #8 -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: rev64 v0.16b, v0.16b +; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: reverse_vector_s8x16b: @@ -161,9 +160,8 @@ entry: define <8 x i16> @reverse_vector_s16x8b(<8 x i16> noundef %x) { ; CHECK-SD-LABEL: reverse_vector_s16x8b: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: rev64 v1.8h, v0.8h -; CHECK-SD-NEXT: ext v0.16b, v1.16b, v1.16b, #8 -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: rev64 v0.8h, v0.8h +; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: reverse_vector_s16x8b: