diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-11-08 04:00:31 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-11-08 04:00:31 +0000 |
| commit | 65e6d0b758eb72bb0b960555ca6eb6ed6d01d79d (patch) | |
| tree | 4862758c1ce778ee60c49eb5be6bd4fc154b27b4 | |
| parent | b832ee68b4577d9ec676ef05fde1c93e02170cb1 (diff) | |
| download | bcm5719-llvm-65e6d0b758eb72bb0b960555ca6eb6ed6d01d79d.tar.gz bcm5719-llvm-65e6d0b758eb72bb0b960555ca6eb6ed6d01d79d.zip | |
[X86] Add patterns to fold EVEX store with EVEX encoded vcvtps2ph instructions. Remove bad pattern that had vf432 vcvtps2ph storing 128-bits.
llvm-svn: 317662
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 34 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/f16c-intrinsics.ll | 12 |
2 files changed, 27 insertions, 19 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 4aa57f4c050..84b44ac677b 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7225,17 +7225,16 @@ multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)), NoItinerary, 0, 0>, AVX512AIi8Base; - def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), - (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), - "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1), - (i32 imm:$src2))), - addr:$dst)]>; - let hasSideEffects = 0, mayStore = 1 in - def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), - (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), - "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", - []>, EVEX_K; + let hasSideEffects = 0, mayStore = 1 in { + def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), + (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>; + def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), + (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", + []>, EVEX_K; + } } multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> { let hasSideEffects = 0 in @@ -7255,6 +7254,19 @@ let Predicates = [HasAVX512] in { defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem>, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; } + + def : Pat<(store (f64 (extractelt + (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))), + (iPTR 0))), addr:$dst), + (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>; + def : Pat<(store (i64 (extractelt + (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))), + (iPTR 0))), addr:$dst), + (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>; + def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst), + (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>; + def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst), + (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>; } // Patterns for matching conversions from float to half-float and vice versa. diff --git a/llvm/test/CodeGen/X86/f16c-intrinsics.ll b/llvm/test/CodeGen/X86/f16c-intrinsics.ll index c0fa94cefaf..64f8fd0ca8d 100644 --- a/llvm/test/CodeGen/X86/f16c-intrinsics.ll +++ b/llvm/test/CodeGen/X86/f16c-intrinsics.ll @@ -303,14 +303,12 @@ define void @test_x86_vcvtps2ph_128_m2(double* nocapture %hf4x16, <4 x float> %f ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2: ; X32-AVX512VL: # BB#0: # %entry ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03] -; X32-AVX512VL-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] +; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03] ; X32-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2: ; X64-AVX512VL: # BB#0: # %entry -; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03] -; X64-AVX512VL-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07] +; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3) @@ -335,14 +333,12 @@ define void @test_x86_vcvtps2ph_128_m3(i64* nocapture %hf4x16, <4 x float> %f4x3 ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3: ; X32-AVX512VL: # BB#0: # %entry ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03] -; X32-AVX512VL-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] +; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03] ; X32-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3: ; X64-AVX512VL: # BB#0: # %entry -; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03] -; X64-AVX512VL-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07] +; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3) |

