summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-11-13 05:25:24 +0000
committerCraig Topper <craig.topper@intel.com>2017-11-13 05:25:24 +0000
commitd4f6094091de107f9ce11d814d8e519f8cf8ac55 (patch)
tree2123c9023b8de3538baa2a597026a527f05f1e00 /llvm/lib/Target
parent24389c674666f1283f5f1821dae6187790f3a961 (diff)
downloadbcm5719-llvm-d4f6094091de107f9ce11d814d8e519f8cf8ac55.tar.gz
bcm5719-llvm-d4f6094091de107f9ce11d814d8e519f8cf8ac55.zip
[X86] Fix SQRTSS/SQRTSD/RCPSS/RCPSD intrinsics to use sse_load_f32/sse_load_f64 to increase load folding opportunities.
llvm-svn: 318016
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td7
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td16
2 files changed, 13 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 8d677be9ea2..12b37874485 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7588,11 +7588,10 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
(_.VT _.RC:$src2),
(i32 FROUND_CURRENT))>;
defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
+ (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(X86fsqrtRnds (_.VT _.RC:$src1),
- (_.VT (scalar_to_vector
- (_.ScalarLdFrag addr:$src2))),
+ _.ScalarIntMemCPat:$src2,
(i32 FROUND_CURRENT))>;
defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -7630,7 +7629,7 @@ let Predicates = [HasAVX512, OptForSize] in {
(!cast<Instruction>(NAME#SUFF#Zm)
(_.EltVT (IMPLICIT_DEF)), addr:$src)>;
- def : Pat<(Intr (scalar_to_vector (_.EltVT (load addr:$src2)))),
+ def : Pat<(Intr _.ScalarIntMemCPat:$src2),
(!cast<Instruction>(NAME#SUFF#Zm_Int)
(_.VT (IMPLICIT_DEF)), addr:$src2)>;
}
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index cb512848e41..dc1eb3e8963 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -3040,6 +3040,7 @@ def SSE_RCPS : OpndItins<
multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
ValueType vt, ValueType ScalarVT,
X86MemOperand x86memop,
+ Operand intmemop, ComplexPattern int_cpat,
Intrinsic Intr,
SDNode OpNode, Domain d, OpndItins itins,
Predicate target, string Suffix> {
@@ -3060,7 +3061,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[]>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
let mayLoad = 1 in
- def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, x86memop:$src2),
+ def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, intmemop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[]>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
@@ -3080,7 +3081,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
// which has a clobber before the rcp, vs.
// rcpss mem, %xmm0
let Predicates = [target, OptForSize] in {
- def : Pat<(Intr (scalar_to_vector (ScalarVT (load addr:$src2)))),
+ def : Pat<(Intr int_cpat:$src2),
(!cast<Instruction>(NAME#Suffix##m_Int)
(vt (IMPLICIT_DEF)), addr:$src2)>;
}
@@ -3089,6 +3090,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
ValueType vt, ValueType ScalarVT,
X86MemOperand x86memop,
+ Operand intmemop, ComplexPattern int_cpat,
Intrinsic Intr, SDNode OpNode, Domain d,
OpndItins itins, Predicate target, string Suffix> {
let hasSideEffects = 0 in {
@@ -3106,7 +3108,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
[]>, Sched<[itins.Sched.Folded]>;
let mayLoad = 1 in
def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, x86memop:$src2),
+ (ins VR128:$src1, intmemop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
@@ -3129,7 +3131,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
VR128:$src)>;
}
let Predicates = [target, OptForSize] in {
- def : Pat<(Intr (scalar_to_vector (ScalarVT (load addr:$src2)))),
+ def : Pat<(Intr int_cpat:$src2),
(!cast<Instruction>("V"#NAME#Suffix##m_Int)
(vt (IMPLICIT_DEF)), addr:$src2)>;
def : Pat<(ScalarVT (OpNode (load addr:$src))),
@@ -3213,10 +3215,11 @@ let Predicates = [HasAVX, NoVLX] in {
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins, Predicate AVXTarget> {
defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, v4f32, f32, f32mem,
+ ssmem, sse_load_f32,
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
SSEPackedSingle, itins, UseSSE1, "SS">, XS;
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
- f32mem,
+ f32mem, ssmem, sse_load_f32,
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
SSEPackedSingle, itins, AVXTarget, "SS">, XS, VEX_4V,
VEX_LIG, VEX_WIG, NotMemoryFoldable;
@@ -3225,10 +3228,11 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins, Predicate AVXTarget> {
defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, v2f64, f64, f64mem,
+ sdmem, sse_load_f64,
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
OpNode, SSEPackedDouble, itins, UseSSE2, "SD">, XD;
defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
- f64mem,
+ f64mem, sdmem, sse_load_f64,
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
OpNode, SSEPackedDouble, itins, AVXTarget, "SD">,
XD, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable;
OpenPOWER on IntegriCloud