diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-11-05 21:14:05 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-11-05 21:14:05 +0000 |
| commit | 4e2f53511ac8dd6b5e3b80648e3834a5022c0b60 (patch) | |
| tree | 4f414ecba2602d0f9b793b674ff000b2a6f41d88 /llvm/lib/Target | |
| parent | 948c39c480a8e7b94da99e6de323d16d30b3c7bd (diff) | |
| download | bcm5719-llvm-4e2f53511ac8dd6b5e3b80648e3834a5022c0b60.tar.gz bcm5719-llvm-4e2f53511ac8dd6b5e3b80648e3834a5022c0b60.zip | |
[X86] Remove some more RCP and RSQRT patterns from InstrAVX512.td that I missed in r317413.
llvm-svn: 317441
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 13 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 24 |
2 files changed, 12 insertions, 25 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 4b2b8c9fd7b..ae56349580a 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7641,19 +7641,6 @@ defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>, defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG; -let Predicates = [HasAVX512] in { - def : Pat<(f32 (X86frsqrt FR32X:$src)), - (COPY_TO_REGCLASS (VRSQRT14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>; - def : Pat<(f32 (X86frsqrt (load addr:$src))), - (COPY_TO_REGCLASS (VRSQRT14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>, - Requires<[OptForSize]>; - def : Pat<(f32 (X86frcp FR32X:$src)), - (COPY_TO_REGCLASS (VRCP14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X )>; - def : Pat<(f32 (X86frcp (load addr:$src))), - (COPY_TO_REGCLASS (VRCP14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>, - Requires<[OptForSize]>; -} - multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> { diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 08d28a78bf0..4314506c34f 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3095,7 +3095,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, ValueType vt, ValueType ScalarVT, X86MemOperand x86memop, Intrinsic Intr, SDNode OpNode, Domain d, - OpndItins itins, string Suffix> { + OpndItins itins, Predicate target, string Suffix> { let hasSideEffects = 0 in { def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -3126,7 +3126,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, // vrcpss mem, %xmm0, %xmm0 // TODO: In theory, we could fold the load, and avoid the stall caused by // the partial register store, either in ExecutionDepsFix or with smarter RA. - let Predicates = [UseAVX] in { + let Predicates = [target] in { def : Pat<(OpNode RC:$src), (!cast<Instruction>("V"#NAME#Suffix##r) (ScalarVT (IMPLICIT_DEF)), RC:$src)>; } @@ -3140,7 +3140,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, (!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)), addr:$src2)>; } - let Predicates = [UseAVX, OptForSize] in { + let Predicates = [target, OptForSize] in { def : Pat<(ScalarVT (OpNode (load addr:$src))), (!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)), addr:$src)>; @@ -3220,40 +3220,40 @@ let Predicates = [HasAVX, NoVLX] in { } multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins> { + OpndItins itins, Predicate AVXTarget> { defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, v4f32, f32, f32mem, !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode, SSEPackedSingle, itins, UseSSE1, "SS">, XS; defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32, f32mem, !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode, - SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG, VEX_WIG, - NotMemoryFoldable; + SSEPackedSingle, itins, AVXTarget, "SS">, XS, VEX_4V, + VEX_LIG, VEX_WIG, NotMemoryFoldable; } multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins> { + OpndItins itins, Predicate AVXTarget> { defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, v2f64, f64, f64mem, !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd), OpNode, SSEPackedDouble, itins, UseSSE2, "SD">, XD; defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64, f64mem, !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd), - OpNode, SSEPackedDouble, itins, "SD">, + OpNode, SSEPackedDouble, itins, AVXTarget, "SD">, XD, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable; } // Square root. -defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>, +defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS, UseAVX>, sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS, [HasAVX, NoVLX]>, - sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD>, + sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD, UseAVX>, sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>; // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>, +defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS, HasAVX>, sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS, [HasAVX]>; -defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS>, +defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS, HasAVX>, sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP, [HasAVX]>; // There is no f64 version of the reciprocal approximation instructions. |

