diff options
author | Craig Topper <craig.topper@intel.com> | 2017-11-08 04:00:30 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2017-11-08 04:00:30 +0000 |
commit | b832ee68b4577d9ec676ef05fde1c93e02170cb1 (patch) | |
tree | ad0d4f451b37a276a01a42c8d81b2a4e483e7d8c /llvm/lib | |
parent | 5f903f3848bc783f66e4e88b835062ac5b301669 (diff) | |
download | bcm5719-llvm-b832ee68b4577d9ec676ef05fde1c93e02170cb1.tar.gz bcm5719-llvm-b832ee68b4577d9ec676ef05fde1c93e02170cb1.zip |
[X86] Allow legacy vcvtps2ph intrinsics to select EVEX encoded instructions. Rely on EVEX->VEX to convert back.
Missed store folding opportunities will be fixed in a subsequent commit.
llvm-svn: 317661
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 32 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 2 |
2 files changed, 16 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index eb5a3ddc58c..955a40ee171 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7698,11 +7698,11 @@ multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop> { T8PD, VEX, Sched<[WriteCvtF2FLd]>; } -multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> { +multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop> { def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), (ins RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>, + [(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>, TAPD, VEX, Sched<[WriteCvtF2F]>; let hasSideEffects = 0, mayStore = 1, SchedRW = [WriteCvtF2FLd, WriteRMW] in @@ -7715,6 +7715,8 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> { let Predicates = [HasF16C, NoVLX] in { defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem>; defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem>, VEX_L; + defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem>; + defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem>, VEX_L; // Pattern match vcvtph2ps of a scalar i64 load. def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))), @@ -7724,23 +7726,17 @@ let Predicates = [HasF16C, NoVLX] in { def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), (VCVTPH2PSrm addr:$src)>; -} - -let Predicates = [HasF16C] in { - defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>; - defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L; - def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16 - (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))), - addr:$dst), - (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; - def : Pat<(store (i64 (extractelt (bc_v2i64 (v8i16 - (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))), - addr:$dst), - (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; - def : Pat<(store (v8i16 (int_x86_vcvtps2ph_256 VR256:$src1, i32:$src2)), - addr:$dst), - (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>; + def : Pat<(store (f64 (extractelt + (bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))), + (iPTR 0))), addr:$dst), + (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; + def : Pat<(store (i64 (extractelt + (bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))), + (iPTR 0))), addr:$dst), + (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; + def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, i32:$src2)), addr:$dst), + (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>; } // Patterns for matching conversions from float to half-float and vice versa. diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 0ed9d2fdee9..9edac22d5ba 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1588,6 +1588,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0), X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0), X86_INTRINSIC_DATA(vcvtph2ps_256, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0), + X86_INTRINSIC_DATA(vcvtps2ph_128, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0), + X86_INTRINSIC_DATA(vcvtps2ph_256, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0), X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0), X86_INTRINSIC_DATA(xop_vpcomd, INTR_TYPE_3OP, X86ISD::VPCOM, 0), X86_INTRINSIC_DATA(xop_vpcomq, INTR_TYPE_3OP, X86ISD::VPCOM, 0), |