Skip to content

Commit 867af26

Browse files
author
Evan Cheng
committed
Add a pattern to do move the low element of a v4f32 and zero extend the rest.
llvm-svn: 50922
1 parent 66da8b5 commit 867af26

File tree

2 files changed

+11
-0
lines changed

2 files changed

+11
-0
lines changed

llvm/lib/Target/X86/X86InstrSSE.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2768,6 +2768,8 @@ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
27682768
(MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
27692769
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
27702770
(MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
2771+
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
2772+
(MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE2]>;
27712773
}
27722774

27732775
// Splat v2f64 / v2i64

llvm/test/CodeGen/X86/vec_set-G.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movss
2+
3+
define fastcc void @t(<4 x float> %A) nounwind {
4+
%tmp41896 = extractelement <4 x float> %A, i32 0 ; <float> [#uses=1]
5+
%tmp14082 = insertelement <4 x float> < float 0.000000e+00, float undef, float undef, float undef >, float %tmp41896, i32 1 ; <<4 x float>> [#uses=1]
6+
%tmp14083 = insertelement <4 x float> %tmp14082, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1]
7+
store <4 x float> %tmp14083, <4 x float>* null, align 16
8+
ret void
9+
}

0 commit comments

Comments
 (0)