@@ -675,20 +675,21 @@ let Constraints = "$src1 = $dst" in {
675
675
def MOVLPSrm : PSI<0x12, MRMSrcMem,
676
676
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
677
677
"movlps\t{$src2, $dst|$dst, $src2}",
678
- [(set VR128:$dst,
679
- (v4f32 (vector_shuffle VR128:$src1,
680
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
681
- MOVLP_shuffle_mask)))]>;
678
+ [(set VR128:$dst,
679
+ (v4f32 (vector_shuffle VR128:$src1,
680
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
681
+ MOVLP_shuffle_mask)))]>;
682
682
def MOVHPSrm : PSI<0x16, MRMSrcMem,
683
683
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
684
684
"movhps\t{$src2, $dst|$dst, $src2}",
685
- [(set VR128:$dst,
686
- (v4f32 (vector_shuffle VR128:$src1,
687
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
688
- MOVHP_shuffle_mask)))]>;
685
+ [(set VR128:$dst,
686
+ (v4f32 (vector_shuffle VR128:$src1,
687
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
688
+ MOVHP_shuffle_mask)))]>;
689
689
} // AddedComplexity
690
690
} // Constraints = "$src1 = $dst"
691
691
692
+
692
693
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
693
694
"movlps\t{$src, $dst|$dst, $src}",
694
695
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -2265,16 +2266,17 @@ def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
2265
2266
2266
2267
// Move to lower bits of a VR128 and zeroing upper bits.
2267
2268
// Loading from memory automatically zeroing upper bits.
2268
- let AddedComplexity = 20 in
2269
- def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
2270
- "movsd\t{$src, $dst|$dst, $src}",
2271
- [(set VR128:$dst,
2272
- (v2f64 (X86vzmovl (v2f64 (scalar_to_vector
2273
- (loadf64 addr:$src))))))]>;
2269
+ let AddedComplexity = 20 in {
2270
+ def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
2271
+ "movsd\t{$src, $dst|$dst, $src}",
2272
+ [(set VR128:$dst,
2273
+ (v2f64 (X86vzmovl (v2f64 (scalar_to_vector
2274
+ (loadf64 addr:$src))))))]>;
2274
2275
2275
2276
def : Pat<(v2f64 (X86vzmovl (memopv2f64 addr:$src))),
2276
- (MOVZSD2PDrm addr:$src)>;
2277
+ (MOVZSD2PDrm addr:$src)>;
2277
2278
def : Pat<(v2f64 (X86vzload addr:$src)), (MOVZSD2PDrm addr:$src)>;
2279
+ }
2278
2280
2279
2281
// movd / movq to XMM register zero-extends
2280
2282
let AddedComplexity = 15 in {
@@ -2301,9 +2303,9 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
2301
2303
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
2302
2304
(loadi64 addr:$src))))))]>, XS,
2303
2305
Requires<[HasSSE2]>;
2304
- }
2305
2306
2306
2307
def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
2308
+ }
2307
2309
2308
2310
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
2309
2311
// IA32 document. movq xmm1, xmm2 does clear the high bits.
0 commit comments