summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-11-24 19:57:48 +0000
committerCraig Topper <craig.topper@intel.com>2017-11-24 19:57:48 +0000
commit13ed01e6355bced6736f39e2910ba98f0293c328 (patch)
tree5c93ff990fe854f6cee3edab4176bbb2d682d8fb /llvm/lib
parent8375bec71ef0dad418dc01706d490300f35b1e74 (diff)
downloadbcm5719-llvm-13ed01e6355bced6736f39e2910ba98f0293c328.tar.gz
bcm5719-llvm-13ed01e6355bced6736f39e2910ba98f0293c328.zip
[X86] Prevent using X * rsqrt(X) to approximate sqrt when only sse1 is enabled.
This optimization can occur after type legalization and emit a vselect with v4i32 type. But that type is not legal with sse1. This ultimately gets scalarized by the second type legalization that runs after vector op legalization, but that's really intended to handle the scalar types that might be introduced by legalizing vector ops. For now just stop this from happening by disabling the optimization with sse1. llvm-svn: 318965
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp5
1 files changed, 4 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c0d850429e1..78b18511bf5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -17187,8 +17187,11 @@ SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
// instructions: convert to single, rsqrtss, convert back to double, refine
// (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
// along with FMA, this could be a throughput win.
+ // TODO: SQRT requires SSE2 to prevent the introduction of an illegal v4i32
+ // after legalize types.
if ((VT == MVT::f32 && Subtarget.hasSSE1()) ||
- (VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
+ (VT == MVT::v4f32 && Subtarget.hasSSE1() && Reciprocal) ||
+ (VT == MVT::v4f32 && Subtarget.hasSSE2() && !Reciprocal) ||
(VT == MVT::v8f32 && Subtarget.hasAVX())) {
if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = 1;
OpenPOWER on IntegriCloud