Allow AVX vrsqrtps generation.

This is a follow-on to r220570 that allows a 256-bit (v8f32) version of vrsqrtps to be generated. llvm-svn: 220579
author: Sanjay Patel <spatel@rotateright.com> 2014-10-24 17:59:18 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2014-10-24 17:59:18 +0000
commit: f924e11967750e5e269e063a22fc495799cfab6d (patch)
tree: 7e3da98cbaa21c1cd6b34d56dd64145f04c0d4b3 /llvm/lib
parent: 80e5b1ebd1f25159f01f55f20e2992c316d3985a (diff)
download: bcm5719-llvm-f924e11967750e5e269e063a22fc495799cfab6d.tar.gz
bcm5719-llvm-f924e11967750e5e269e063a22fc495799cfab6d.zip
1 files changed, 3 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b3541545403..26b1e6d51ea 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -14383,13 +14383,14 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
   EVT VT = Op.getValueType();
   
   // SSE1 has rsqrtss and rsqrtps.
-  // TODO: Add support for AVX (v8f32) and AVX512 (v16f32).
+  // TODO: Add support for AVX512 (v16f32).
   // It is likely not profitable to do this for f64 because a double-precision
   // rsqrt estimate with refinement on x86 prior to FMA requires at least 16
   // instructions: convert to single, rsqrtss, convert back to double, refine
   // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
   // along with FMA, this could be a throughput win.
-  if (Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) {
+  if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
+      (Subtarget->hasAVX() && VT == MVT::v8f32)) {
     RefinementSteps = 1;
     UseOneConstNR = false;
     return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
author	Sanjay Patel <spatel@rotateright.com>	2014-10-24 17:59:18 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2014-10-24 17:59:18 +0000
commit	f924e11967750e5e269e063a22fc495799cfab6d (patch)
tree	7e3da98cbaa21c1cd6b34d56dd64145f04c0d4b3 /llvm/lib
parent	80e5b1ebd1f25159f01f55f20e2992c316d3985a (diff)
download	bcm5719-llvm-f924e11967750e5e269e063a22fc495799cfab6d.tar.gz bcm5719-llvm-f924e11967750e5e269e063a22fc495799cfab6d.zip