diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-11-13 05:25:24 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-11-13 05:25:24 +0000 |
| commit | d4f6094091de107f9ce11d814d8e519f8cf8ac55 (patch) | |
| tree | 2123c9023b8de3538baa2a597026a527f05f1e00 /llvm/lib/Target | |
| parent | 24389c674666f1283f5f1821dae6187790f3a961 (diff) | |
| download | bcm5719-llvm-d4f6094091de107f9ce11d814d8e519f8cf8ac55.tar.gz bcm5719-llvm-d4f6094091de107f9ce11d814d8e519f8cf8ac55.zip | |
[X86] Fix SQRTSS/SQRTSD/RCPSS/RCPSD intrinsics to use sse_load_f32/sse_load_f64 to increase load folding opportunities.
llvm-svn: 318016
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 16 |
2 files changed, 13 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 8d677be9ea2..12b37874485 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7588,11 +7588,10 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, (_.VT _.RC:$src2), (i32 FROUND_CURRENT))>; defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, + (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (X86fsqrtRnds (_.VT _.RC:$src1), - (_.VT (scalar_to_vector - (_.ScalarLdFrag addr:$src2))), + _.ScalarIntMemCPat:$src2, (i32 FROUND_CURRENT))>; defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), @@ -7630,7 +7629,7 @@ let Predicates = [HasAVX512, OptForSize] in { (!cast<Instruction>(NAME#SUFF#Zm) (_.EltVT (IMPLICIT_DEF)), addr:$src)>; - def : Pat<(Intr (scalar_to_vector (_.EltVT (load addr:$src2)))), + def : Pat<(Intr _.ScalarIntMemCPat:$src2), (!cast<Instruction>(NAME#SUFF#Zm_Int) (_.VT (IMPLICIT_DEF)), addr:$src2)>; } diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index cb512848e41..dc1eb3e8963 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3040,6 +3040,7 @@ def SSE_RCPS : OpndItins< multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, ValueType vt, ValueType ScalarVT, X86MemOperand x86memop, + Operand intmemop, ComplexPattern int_cpat, Intrinsic Intr, SDNode OpNode, Domain d, OpndItins itins, Predicate target, string Suffix> { @@ -3060,7 +3061,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>, Sched<[itins.Sched.Folded, ReadAfterLd]>; let mayLoad = 1 in - def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, x86memop:$src2), + def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, intmemop:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } @@ -3080,7 +3081,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, // which has a clobber before the rcp, vs. // rcpss mem, %xmm0 let Predicates = [target, OptForSize] in { - def : Pat<(Intr (scalar_to_vector (ScalarVT (load addr:$src2)))), + def : Pat<(Intr int_cpat:$src2), (!cast<Instruction>(NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)), addr:$src2)>; } @@ -3089,6 +3090,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, ValueType vt, ValueType ScalarVT, X86MemOperand x86memop, + Operand intmemop, ComplexPattern int_cpat, Intrinsic Intr, SDNode OpNode, Domain d, OpndItins itins, Predicate target, string Suffix> { let hasSideEffects = 0 in { @@ -3106,7 +3108,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, []>, Sched<[itins.Sched.Folded]>; let mayLoad = 1 in def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, x86memop:$src2), + (ins VR128:$src1, intmemop:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } @@ -3129,7 +3131,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, VR128:$src)>; } let Predicates = [target, OptForSize] in { - def : Pat<(Intr (scalar_to_vector (ScalarVT (load addr:$src2)))), + def : Pat<(Intr int_cpat:$src2), (!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)), addr:$src2)>; def : Pat<(ScalarVT (OpNode (load addr:$src))), @@ -3213,10 +3215,11 @@ let Predicates = [HasAVX, NoVLX] in { multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins, Predicate AVXTarget> { defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, v4f32, f32, f32mem, + ssmem, sse_load_f32, !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode, SSEPackedSingle, itins, UseSSE1, "SS">, XS; defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32, - f32mem, + f32mem, ssmem, sse_load_f32, !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode, SSEPackedSingle, itins, AVXTarget, "SS">, XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable; @@ -3225,10 +3228,11 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins, Predicate AVXTarget> { defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, v2f64, f64, f64mem, + sdmem, sse_load_f64, !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd), OpNode, SSEPackedDouble, itins, UseSSE2, "SD">, XD; defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64, - f64mem, + f64mem, sdmem, sse_load_f64, !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd), OpNode, SSEPackedDouble, itins, AVXTarget, "SD">, XD, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable; |

