diff options
| author | Preston Gurd <preston.gurd@intel.com> | 2013-05-07 19:57:34 +0000 |
|---|---|---|
| committer | Preston Gurd <preston.gurd@intel.com> | 2013-05-07 19:57:34 +0000 |
| commit | 9264c954003caea18ed08e09cd1f1b712aedde8a (patch) | |
| tree | 2a746015418cc6d238ea7e9240cb8960d917741d /llvm/lib | |
| parent | c64c029f85af1087ca1ccd6eb8190538207dccce (diff) | |
| download | bcm5719-llvm-9264c954003caea18ed08e09cd1f1b712aedde8a.tar.gz bcm5719-llvm-9264c954003caea18ed08e09cd1f1b712aedde8a.zip | |
Corrected Atom latencies for SSE SQRT instructions.
llvm-svn: 181346
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 30 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleAtom.td | 13 |
3 files changed, 36 insertions, 19 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index cce938baafe..dcfaaf950f7 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3049,12 +3049,20 @@ let isCodeGenOnly = 1 in { /// And, we have a special variant form for a full-vector intrinsic form. let Sched = WriteFSqrt in { -def SSE_SQRTP : OpndItins< - IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM +def SSE_SQRTPS : OpndItins< + IIC_SSE_SQRTPS_RR, IIC_SSE_SQRTPS_RM >; -def SSE_SQRTS : OpndItins< - IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM +def SSE_SQRTSS : OpndItins< + IIC_SSE_SQRTSS_RR, IIC_SSE_SQRTSS_RM +>; + +def SSE_SQRTPD : OpndItins< + IIC_SSE_SQRTPD_RR, IIC_SSE_SQRTPD_RM +>; + +def SSE_SQRTSD : OpndItins< + IIC_SSE_SQRTSD_RR, IIC_SSE_SQRTSD_RM >; } @@ -3319,18 +3327,18 @@ let Predicates = [HasAVX] in { // Square root. defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, - SSE_SQRTS>, - sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>, + SSE_SQRTSS>, + sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>, sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, - SSE_SQRTS>, - sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>; + SSE_SQRTSD>, + sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>; // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>, - sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>, +defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTSS>, + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTPS>, sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, - int_x86_avx_rsqrt_ps_256, SSE_SQRTP>; + int_x86_avx_rsqrt_ps_256, SSE_SQRTPS>; defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>, sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 9fbde88b710..9f2c7810fa5 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -266,10 +266,14 @@ def IIC_SSE_PINSRW : InstrItinClass; def IIC_SSE_PABS_RR : InstrItinClass; def IIC_SSE_PABS_RM : InstrItinClass; -def IIC_SSE_SQRTP_RR : InstrItinClass; -def IIC_SSE_SQRTP_RM : InstrItinClass; -def IIC_SSE_SQRTS_RR : InstrItinClass; -def IIC_SSE_SQRTS_RM : InstrItinClass; +def IIC_SSE_SQRTPS_RR : InstrItinClass; +def IIC_SSE_SQRTPS_RM : InstrItinClass; +def IIC_SSE_SQRTSS_RR : InstrItinClass; +def IIC_SSE_SQRTSS_RM : InstrItinClass; +def IIC_SSE_SQRTPD_RR : InstrItinClass; +def IIC_SSE_SQRTPD_RM : InstrItinClass; +def IIC_SSE_SQRTSD_RR : InstrItinClass; +def IIC_SSE_SQRTSD_RM : InstrItinClass; def IIC_SSE_RCPP_RR : InstrItinClass; def IIC_SSE_RCPP_RM : InstrItinClass; diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index cce8f1b1143..cb0960aad13 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -211,10 +211,15 @@ def AtomItineraries : ProcessorItineraries< InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [Port0]>] >, - InstrItinData<IIC_SSE_SQRTP_RR, [InstrStage<13, [Port0, Port1]>] >, - InstrItinData<IIC_SSE_SQRTP_RM, [InstrStage<14, [Port0, Port1]>] >, - InstrItinData<IIC_SSE_SQRTS_RR, [InstrStage<11, [Port0, Port1]>] >, - InstrItinData<IIC_SSE_SQRTS_RM, [InstrStage<12, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_SQRTPS_RR, [InstrStage<70, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_SQRTPS_RM, [InstrStage<70, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_SQRTSS_RR, [InstrStage<34, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_SQRTSS_RM, [InstrStage<34, [Port0, Port1]>] >, + + InstrItinData<IIC_SSE_SQRTPD_RR, [InstrStage<125, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_SQRTPD_RM, [InstrStage<125, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_SQRTSD_RR, [InstrStage<62, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_SQRTSD_RM, [InstrStage<62, [Port0, Port1]>] >, InstrItinData<IIC_SSE_RCPP_RR, [InstrStage<9, [Port0, Port1]>] >, InstrItinData<IIC_SSE_RCPP_RM, [InstrStage<10, [Port0, Port1]>] >, |

