Skip to content

[AArch64][ISel] Subvector extracts can use undef for second EXT input #151729

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15544,7 +15544,9 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,

assert(InVT.isScalableVector() && "Unexpected vector type!");
// Move requested subvector to the start of the vector and try again.
SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, InVT, Vec, Vec, Idx);
// There's no need for a second input to vector_splice, so use undef there.
SDValue Splice =
DAG.getNode(ISD::VECTOR_SPLICE, DL, InVT, Vec, DAG.getUNDEF(InVT), Idx);
Comment on lines +15547 to +15549
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem with using undef operands is that you loose control over dataflow. Whilst it frees the register allocator it can lead to situations where the chosen register is the result of an independent long latency instruction (e.g. fsqrt) at which point you end up worse off.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I guess I still need to reprogram my brain, because of course aarch64 CPUs have HW interlocks... So even if there is no logical dependency due to the undef, the HW will certainly see a dependency and wait for the register to be available.

I think I have to agree with you then, it's probably better to avoid undef and unexpected dependencies.

return convertFromScalableVector(DAG, VT, Splice);
}

Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
; CHECK-SVE-NEXT: mov z1.s, p0/z, #1 // =0x1
; CHECK-SVE-NEXT: fmov s0, w8
; CHECK-SVE-NEXT: mov v0.s[1], v1.s[1]
; CHECK-SVE-NEXT: ext z1.b, z1.b, z1.b, #8
; CHECK-SVE-NEXT: ext z1.b, z1.b, z0.b, #8
; CHECK-SVE-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SVE-NEXT: // kill: def $d1 killed $d1 killed $z1
; CHECK-SVE-NEXT: b use
Expand All @@ -192,7 +192,7 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
; CHECK-SVE2p1-NEXT: mov z1.s, p0/z, #1 // =0x1
; CHECK-SVE2p1-NEXT: fmov s0, w8
; CHECK-SVE2p1-NEXT: mov v0.s[1], v1.s[1]
; CHECK-SVE2p1-NEXT: ext z1.b, z1.b, z1.b, #8
; CHECK-SVE2p1-NEXT: ext z1.b, z1.b, z0.b, #8
; CHECK-SVE2p1-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SVE2p1-NEXT: // kill: def $d1 killed $d1 killed $z1
; CHECK-SVE2p1-NEXT: b use
Expand All @@ -204,10 +204,10 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
; CHECK-SME2-NEXT: mov z1.s, p0/z, #1 // =0x1
; CHECK-SME2-NEXT: fmov s2, w8
; CHECK-SME2-NEXT: mov z0.s, z1.s[1]
; CHECK-SME2-NEXT: ext z1.b, z1.b, z1.b, #8
; CHECK-SME2-NEXT: // kill: def $d1 killed $d1 killed $z1
; CHECK-SME2-NEXT: zip1 z0.s, z2.s, z0.s
; CHECK-SME2-NEXT: ext z1.b, z1.b, z0.b, #8
; CHECK-SME2-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-SME2-NEXT: // kill: def $d1 killed $d1 killed $z1
; CHECK-SME2-NEXT: b use
%r = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %i, i64 %n)
%v0 = call <2 x i1> @llvm.vector.extract.v2i1.nxv4i1.i64(<vscale x 4 x i1> %r, i64 0)
Expand Down
52 changes: 26 additions & 26 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
; VBITS_GE_256-NEXT: sunpklo z0.h, z0.b
; VBITS_GE_256-NEXT: sunpklo z2.s, z1.h
; VBITS_GE_256-NEXT: sunpklo z3.s, z0.h
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
Expand Down Expand Up @@ -210,7 +210,7 @@ define void @sdiv_v128i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: sunpklo z0.h, z0.b
; CHECK-NEXT: sunpklo z2.s, z1.h
; CHECK-NEXT: sunpklo z3.s, z0.h
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #128
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
; CHECK-NEXT: sunpklo z1.s, z1.h
; CHECK-NEXT: sunpklo z0.s, z0.h
Expand Down Expand Up @@ -239,24 +239,24 @@ define void @sdiv_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
; CHECK-NEXT: sunpklo z2.h, z1.b
; CHECK-NEXT: sunpklo z3.h, z0.b
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #128
; CHECK-NEXT: sunpklo z1.h, z1.b
; CHECK-NEXT: sunpklo z4.s, z2.h
; CHECK-NEXT: sunpklo z5.s, z3.h
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #128
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128
; CHECK-NEXT: sunpklo z0.h, z0.b
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #128
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
; CHECK-NEXT: sunpklo z2.s, z2.h
; CHECK-NEXT: sunpklo z3.s, z3.h
; CHECK-NEXT: sunpklo z0.h, z0.b
; CHECK-NEXT: sdivr z4.s, p1/m, z4.s, z5.s
; CHECK-NEXT: sunpklo z5.s, z0.h
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z3.s
; CHECK-NEXT: sunpklo z3.s, z1.h
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #128
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
; CHECK-NEXT: sunpklo z1.s, z1.h
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: sdivr z3.s, p1/m, z3.s, z5.s
; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: sdiv z0.s, p1/m, z0.s, z1.s
Expand Down Expand Up @@ -398,7 +398,7 @@ define void @sdiv_v16i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1]
; VBITS_GE_256-NEXT: sunpklo z2.s, z1.h
; VBITS_GE_256-NEXT: sunpklo z3.s, z0.h
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
Expand Down Expand Up @@ -476,7 +476,7 @@ define void @sdiv_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: sunpklo z2.s, z1.h
; CHECK-NEXT: sunpklo z3.s, z0.h
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #128
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
; CHECK-NEXT: sunpklo z1.s, z1.h
; CHECK-NEXT: sunpklo z0.s, z0.h
Expand Down Expand Up @@ -858,7 +858,7 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
; VBITS_GE_256-NEXT: uunpklo z0.h, z0.b
; VBITS_GE_256-NEXT: uunpklo z2.s, z1.h
; VBITS_GE_256-NEXT: uunpklo z3.s, z0.h
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
Expand Down Expand Up @@ -930,12 +930,12 @@ define void @udiv_v128i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x1]
; CHECK-NEXT: ld1b { z1.h }, p0/z, [x0]
; CHECK-NEXT: uunpklo z2.s, z0.h
; CHECK-NEXT: uunpklo z3.s, z1.h
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128
; CHECK-NEXT: uunpklo z3.s, z1.h
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #128
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: udivr z2.s, p1/m, z2.s, z3.s
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: udivr z0.s, p1/m, z0.s, z1.s
; CHECK-NEXT: ptrue p1.h, vl64
; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
Expand All @@ -959,24 +959,24 @@ define void @udiv_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
; CHECK-NEXT: uunpklo z2.h, z1.b
; CHECK-NEXT: uunpklo z3.h, z0.b
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #128
; CHECK-NEXT: uunpklo z1.h, z1.b
; CHECK-NEXT: uunpklo z4.s, z2.h
; CHECK-NEXT: uunpklo z5.s, z3.h
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #128
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #128
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
; CHECK-NEXT: uunpklo z2.s, z2.h
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: udivr z4.s, p1/m, z4.s, z5.s
; CHECK-NEXT: uunpklo z5.s, z0.h
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: udivr z2.s, p1/m, z2.s, z3.s
; CHECK-NEXT: uunpklo z3.s, z1.h
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #128
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: udivr z3.s, p1/m, z3.s, z5.s
; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: udiv z0.s, p1/m, z0.s, z1.s
Expand Down Expand Up @@ -1118,7 +1118,7 @@ define void @udiv_v16i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1]
; VBITS_GE_256-NEXT: uunpklo z2.s, z1.h
; VBITS_GE_256-NEXT: uunpklo z3.s, z0.h
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
Expand Down Expand Up @@ -1187,7 +1187,7 @@ define void @udiv_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: uunpklo z2.s, z1.h
; CHECK-NEXT: uunpklo z3.s, z0.h
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #128
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: uunpklo z0.s, z0.h
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: sunpklo z4.s, z2.h
; VBITS_GE_256-NEXT: sunpklo z5.s, z3.h
; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z3.b, #16
; VBITS_GE_256-NEXT: ext z2.b, z2.b, z0.b, #16
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z0.b, #16
; VBITS_GE_256-NEXT: sunpklo z2.s, z2.h
; VBITS_GE_256-NEXT: sunpklo z3.s, z3.h
; VBITS_GE_256-NEXT: sdivr z4.s, p0/m, z4.s, z5.s
Expand Down Expand Up @@ -222,8 +222,8 @@ define void @srem_v128i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: sunpklo z3.h, z0.b
; CHECK-NEXT: sunpklo z4.s, z2.h
; CHECK-NEXT: sunpklo z5.s, z3.h
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #128
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #128
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
; CHECK-NEXT: sunpklo z2.s, z2.h
; CHECK-NEXT: sunpklo z3.s, z3.h
; CHECK-NEXT: sdivr z4.s, p1/m, z4.s, z5.s
Expand Down Expand Up @@ -254,24 +254,24 @@ define void @srem_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: sunpklo z3.h, z0.b
; CHECK-NEXT: sunpklo z4.s, z2.h
; CHECK-NEXT: sunpklo z5.s, z3.h
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #128
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #128
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
; CHECK-NEXT: sunpklo z2.s, z2.h
; CHECK-NEXT: sunpklo z3.s, z3.h
; CHECK-NEXT: sdivr z4.s, p1/m, z4.s, z5.s
; CHECK-NEXT: mov z5.d, z0.d
; CHECK-NEXT: ext z5.b, z5.b, z0.b, #128
; CHECK-NEXT: sunpklo z5.h, z5.b
; CHECK-NEXT: sunpklo z7.s, z5.h
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #128
; CHECK-NEXT: ext z5.b, z5.b, z0.b, #128
; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z3.s
; CHECK-NEXT: mov z3.d, z1.d
; CHECK-NEXT: sunpklo z5.s, z5.h
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #128
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
; CHECK-NEXT: sunpklo z3.h, z3.b
; CHECK-NEXT: sunpklo z6.s, z3.h
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
; CHECK-NEXT: sunpklo z3.s, z3.h
; CHECK-NEXT: sdivr z6.s, p1/m, z6.s, z7.s
; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
Expand Down Expand Up @@ -425,7 +425,7 @@ define void @srem_v16i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: sdivr z2.s, p1/m, z2.s, z3.s
; VBITS_GE_256-NEXT: mov z3.d, z1.d
; VBITS_GE_256-NEXT: sunpklo z4.s, z4.h
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z1.b, #16
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z0.b, #16
; VBITS_GE_256-NEXT: sunpklo z3.s, z3.h
; VBITS_GE_256-NEXT: sdivr z3.s, p1/m, z3.s, z4.s
; VBITS_GE_256-NEXT: ptrue p1.h, vl8
Expand Down Expand Up @@ -512,7 +512,7 @@ define void @srem_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z3.s
; CHECK-NEXT: mov z3.d, z1.d
; CHECK-NEXT: sunpklo z4.s, z4.h
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #128
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
; CHECK-NEXT: sunpklo z3.s, z3.h
; CHECK-NEXT: sdivr z3.s, p1/m, z3.s, z4.s
; CHECK-NEXT: ptrue p1.h, vl64
Expand Down Expand Up @@ -947,8 +947,8 @@ define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: uunpklo z4.s, z2.h
; VBITS_GE_256-NEXT: uunpklo z5.s, z3.h
; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z3.b, #16
; VBITS_GE_256-NEXT: ext z2.b, z2.b, z0.b, #16
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z2.s, z2.h
; VBITS_GE_256-NEXT: uunpklo z3.s, z3.h
; VBITS_GE_256-NEXT: udivr z4.s, p0/m, z4.s, z5.s
Expand Down Expand Up @@ -1040,8 +1040,8 @@ define void @urem_v128i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: uunpklo z3.h, z0.b
; CHECK-NEXT: uunpklo z4.s, z2.h
; CHECK-NEXT: uunpklo z5.s, z3.h
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #128
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #128
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
; CHECK-NEXT: uunpklo z2.s, z2.h
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: udivr z4.s, p1/m, z4.s, z5.s
Expand Down Expand Up @@ -1072,24 +1072,24 @@ define void @urem_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: uunpklo z3.h, z0.b
; CHECK-NEXT: uunpklo z4.s, z2.h
; CHECK-NEXT: uunpklo z5.s, z3.h
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #128
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #128
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
; CHECK-NEXT: uunpklo z2.s, z2.h
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: udivr z4.s, p1/m, z4.s, z5.s
; CHECK-NEXT: mov z5.d, z0.d
; CHECK-NEXT: ext z5.b, z5.b, z0.b, #128
; CHECK-NEXT: uunpklo z5.h, z5.b
; CHECK-NEXT: uunpklo z7.s, z5.h
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #128
; CHECK-NEXT: ext z5.b, z5.b, z0.b, #128
; CHECK-NEXT: udivr z2.s, p1/m, z2.s, z3.s
; CHECK-NEXT: mov z3.d, z1.d
; CHECK-NEXT: uunpklo z5.s, z5.h
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #128
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
; CHECK-NEXT: uunpklo z3.h, z3.b
; CHECK-NEXT: uunpklo z6.s, z3.h
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: udivr z6.s, p1/m, z6.s, z7.s
; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
Expand Down Expand Up @@ -1243,7 +1243,7 @@ define void @urem_v16i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: udivr z2.s, p1/m, z2.s, z3.s
; VBITS_GE_256-NEXT: mov z3.d, z1.d
; VBITS_GE_256-NEXT: uunpklo z4.s, z4.h
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z1.b, #16
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z3.s, z3.h
; VBITS_GE_256-NEXT: udivr z3.s, p1/m, z3.s, z4.s
; VBITS_GE_256-NEXT: ptrue p1.h, vl8
Expand Down Expand Up @@ -1330,7 +1330,7 @@ define void @urem_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: udivr z2.s, p1/m, z2.s, z3.s
; CHECK-NEXT: mov z3.d, z1.d
; CHECK-NEXT: uunpklo z4.s, z4.h
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #128
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #128
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: udivr z3.s, p1/m, z3.s, z4.s
; CHECK-NEXT: ptrue p1.h, vl64
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
Original file line number Diff line number Diff line change
Expand Up @@ -338,14 +338,14 @@ define void @masked_scatter_v8i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1d { z4.d }, p1/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: cmpeq p0.s, p0/z, z0.s, #0
; VBITS_GE_256-NEXT: uunpklo z2.d, z0.s
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: punpklo p0.h, p0.b
; VBITS_GE_256-NEXT: and p0.b, p0/z, p0.b, p1.b
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
; VBITS_GE_256-NEXT: st1w { z2.d }, p0, [z3.d]
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: cmpne p0.d, p1/z, z1.d, #0
; VBITS_GE_256-NEXT: st1w { z0.d }, p0, [z4.d]
; VBITS_GE_256-NEXT: ret
Expand Down Expand Up @@ -715,14 +715,14 @@ define void @masked_scatter_v8f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1d { z4.d }, p1/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0
; VBITS_GE_256-NEXT: uunpklo z2.d, z0.s
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: punpklo p0.h, p0.b
; VBITS_GE_256-NEXT: and p0.b, p0/z, p0.b, p1.b
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
; VBITS_GE_256-NEXT: st1w { z2.d }, p0, [z3.d]
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: cmpne p0.d, p1/z, z1.d, #0
; VBITS_GE_256-NEXT: st1w { z0.d }, p0, [z4.d]
; VBITS_GE_256-NEXT: ret
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ define void @hang_when_merging_stores_after_legalisation(ptr %a, <2 x i32> %b) v
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: mov z0.s, s0
; CHECK-NEXT: mov z1.d, z0.d
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #16
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
; CHECK-NEXT: st2 { v0.4s, v1.4s }, [x0]
; CHECK-NEXT: ret
%splat = shufflevector <2 x i32> %b, <2 x i32> poison, <8 x i32> zeroinitializer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ define void @fcvt_v16f16_to_v16f32(<16 x half> %a, ptr %b) {
; CHECK-NEXT: uunpklo z2.s, z1.h
; CHECK-NEXT: uunpklo z3.s, z0.h
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: uunpklo z0.s, z0.h
Expand Down
Loading