Skip to content

Commit d550a31

Browse files
committed
[X86] Add patterns to fold EVEX store with EVEX encoded vcvtps2ph instructions. Remove bad pattern that had vf432 vcvtps2ph storing 128-bits.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317662 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent c30df5f commit d550a31

File tree

2 files changed

+27
-19
lines changed

2 files changed

+27
-19
lines changed

lib/Target/X86/X86InstrAVX512.td

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7225,17 +7225,16 @@ multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
72257225
(X86cvtps2ph (_src.VT _src.RC:$src1),
72267226
(i32 imm:$src2)),
72277227
NoItinerary, 0, 0>, AVX512AIi8Base;
7228-
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
7229-
(ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
7230-
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7231-
[(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1),
7232-
(i32 imm:$src2))),
7233-
addr:$dst)]>;
7234-
let hasSideEffects = 0, mayStore = 1 in
7235-
def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
7236-
(ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
7237-
"vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
7238-
[]>, EVEX_K;
7228+
let hasSideEffects = 0, mayStore = 1 in {
7229+
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
7230+
(ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
7231+
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7232+
[]>;
7233+
def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
7234+
(ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
7235+
"vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
7236+
[]>, EVEX_K;
7237+
}
72397238
}
72407239
multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
72417240
let hasSideEffects = 0 in
@@ -7255,6 +7254,19 @@ let Predicates = [HasAVX512] in {
72557254
defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem>,
72567255
EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
72577256
}
7257+
7258+
def : Pat<(store (f64 (extractelt
7259+
(bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7260+
(iPTR 0))), addr:$dst),
7261+
(VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7262+
def : Pat<(store (i64 (extractelt
7263+
(bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7264+
(iPTR 0))), addr:$dst),
7265+
(VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7266+
def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
7267+
(VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
7268+
def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
7269+
(VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
72587270
}
72597271

72607272
// Patterns for matching conversions from float to half-float and vice versa.

test/CodeGen/X86/f16c-intrinsics.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -303,14 +303,12 @@ define void @test_x86_vcvtps2ph_128_m2(double* nocapture %hf4x16, <4 x float> %f
303303
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2:
304304
; X32-AVX512VL: # BB#0: # %entry
305305
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
306-
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
307-
; X32-AVX512VL-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
306+
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
308307
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
309308
;
310309
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2:
311310
; X64-AVX512VL: # BB#0: # %entry
312-
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
313-
; X64-AVX512VL-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
311+
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
314312
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
315313
entry:
316314
%0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3)
@@ -335,14 +333,12 @@ define void @test_x86_vcvtps2ph_128_m3(i64* nocapture %hf4x16, <4 x float> %f4x3
335333
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3:
336334
; X32-AVX512VL: # BB#0: # %entry
337335
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
338-
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
339-
; X32-AVX512VL-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
336+
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
340337
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
341338
;
342339
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3:
343340
; X64-AVX512VL: # BB#0: # %entry
344-
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
345-
; X64-AVX512VL-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
341+
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
346342
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
347343
entry:
348344
%0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3)

0 commit comments

Comments
 (0)