Late evaluation of the fast-math vectorization requirement.

This patch moves the verification of fast-math to just before vectorization is done. This way we can tell clang to append the command line options would that allow floating-point commutativity. Specifically those are enableing fast-math or specifying a loop hint. llvm-svn: 244489
author: Tyler Nowicki <tyler.nowicki@gmail.com> 2015-08-10 19:51:46 +0000
committer: Tyler Nowicki <tyler.nowicki@gmail.com> 2015-08-10 19:51:46 +0000
commit: c1a86f586617f8127eaf5024d27238a92a735db1 (patch)
tree: da915860da5199e699a372a22cefd4a4bd1b476a /llvm/lib/Transforms
parent: c2e3ba48e315312560a2fb26cbb8e8045ab9991e (diff)
download: bcm5719-llvm-c1a86f586617f8127eaf5024d27238a92a735db1.tar.gz
bcm5719-llvm-c1a86f586617f8127eaf5024d27238a92a735db1.zip
2 files changed, 70 insertions, 7 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 5cbde94a98e..dae19d23db6 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -201,7 +201,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
 
   // Save the description of this reduction variable.
   RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind,
-                          ReduxDesc.getMinMaxKind());
+                          ReduxDesc.getMinMaxKind(),
+                          ReduxDesc.getUnsafeAlgebraInst());
 
   RedDes = RD;
 
@@ -263,7 +264,10 @@ RecurrenceDescriptor::InstDesc
 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
                                         InstDesc &Prev, bool HasFunNoNaNAttr) {
   bool FP = I->getType()->isFloatingPointTy();
-  bool FastMath = FP && I->hasUnsafeAlgebra();
+  Instruction *UAI = Prev.getUnsafeAlgebraInst();
+  if (!UAI && FP && !I->hasUnsafeAlgebra())
+    UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
+
   switch (I->getOpcode()) {
   default:
     return InstDesc(false, I);
@@ -284,10 +288,10 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
   case Instruction::Xor:
     return InstDesc(Kind == RK_IntegerXor, I);
   case Instruction::FMul:
-    return InstDesc(Kind == RK_FloatMult && FastMath, I);
+    return InstDesc(Kind == RK_FloatMult, I, UAI);
   case Instruction::FSub:
   case Instruction::FAdd:
-    return InstDesc(Kind == RK_FloatAdd && FastMath, I);
+    return InstDesc(Kind == RK_FloatAdd, I, UAI);
   case Instruction::FCmp:
   case Instruction::ICmp:
   case Instruction::Select:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 47e7436cf6d..9cfd5feca2d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -220,6 +220,7 @@ namespace {
 class LoopVectorizationLegality;
 class LoopVectorizationCostModel;
 class LoopVectorizeHints;
+class LoopVectorizationRequirements;
 
 /// \brief This modifies LoopAccessReport to initialize message with
 /// loop-vectorizer-specific part.
@@ -796,10 +797,12 @@ public:
   LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
                             TargetLibraryInfo *TLI, AliasAnalysis *AA,
                             Function *F, const TargetTransformInfo *TTI,
-                            LoopAccessAnalysis *LAA)
+                            LoopAccessAnalysis *LAA,
+                            LoopVectorizationRequirements *R)
       : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F),
         TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(SE, L, DT),
-        Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false) {}
+        Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false),
+        Requirements(R) {}
 
   /// This enum represents the kinds of inductions that we support.
   enum InductionKind {
@@ -1065,6 +1068,9 @@ private:
   /// Can we assume the absence of NaNs.
   bool HasFunNoNaNAttr;
 
+  /// Vectorization requirements that will go through late-evaluation.
+  LoopVectorizationRequirements *Requirements;
+
   ValueToValueMap Strides;
   SmallPtrSet<Value *, 8> StrideSet;
 
@@ -1415,6 +1421,47 @@ static void emitMissedWarning(Function *F, Loop *L,
   }
 }
 
+/// \brief This holds vectorization requirements that must be verified late in
+/// the process. The requirements are set by legalize and costmodel. Once
+/// vectorization has been determined to be possible and profitable the
+/// requirements can be verified by looking for metadata or compiler options.
+/// For example, some loops require FP commutativity which is only allowed if
+/// vectorization is explicitly specified or if the fast-math compiler option
+/// has been provided.
+/// Late evaluation of these requirements allows helpful diagnostics to be
+/// composed that tells the user what need to be done to vectorize the loop. For
+/// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
+/// evaluation should be used only when diagnostics can generated that can be
+/// followed by a non-expert user.
+class LoopVectorizationRequirements {
+public:
+  LoopVectorizationRequirements() : UnsafeAlgebraInst(nullptr) {}
+
+  void addUnsafeAlgebraInst(Instruction *I) {
+    // First unsafe algebra instruction.
+    if (!UnsafeAlgebraInst)
+      UnsafeAlgebraInst = I;
+  }
+
+  bool doesNotMeet(Function *F, const LoopVectorizeHints &Hints) {
+    if (UnsafeAlgebraInst &&
+        Hints.getForce() == LoopVectorizeHints::FK_Undefined &&
+        Hints.getWidth() == 0) {
+      emitOptimizationRemarkAnalysisFPCommute(
+          F->getContext(), DEBUG_TYPE, *F, UnsafeAlgebraInst->getDebugLoc(),
+          VectorizationReport() << "vectorization requires changes in the "
+                                   "order of operations, however IEEE 754 "
+                                   "floating-point operations are not "
+                                   "commutative");
+      return true;
+    }
+    return false;
+  }
+
+private:
+  Instruction *UnsafeAlgebraInst;
+};
+
 static void addInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) {
   if (L.empty())
     return V.push_back(&L);
@@ -1609,7 +1656,9 @@ struct LoopVectorize : public FunctionPass {
     }
 
     // Check if it is legal to vectorize the loop.
-    LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA);
+    LoopVectorizationRequirements Requirements;
+    LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA,
+                                  &Requirements);
     if (!LVL.canVectorize()) {
       DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
       emitMissedWarning(F, L, Hints);
@@ -1665,6 +1714,13 @@ struct LoopVectorize : public FunctionPass {
     std::string VecDiagMsg, IntDiagMsg;
     bool VectorizeLoop = true, InterleaveLoop = true;
 
+    if (Requirements.doesNotMeet(F, Hints)) {
+      DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization "
+                      "requirements.\n");
+      emitMissedWarning(F, L, Hints);
+      return false;
+    }
+
     if (VF.Width == 1) {
       DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
       VecDiagMsg =
@@ -4079,6 +4135,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
 
         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop,
                                                  Reductions[Phi])) {
+          if (Reductions[Phi].hasUnsafeAlgebra())
+            Requirements->addUnsafeAlgebraInst(
+                Reductions[Phi].getUnsafeAlgebraInst());
           AllowedExit.insert(Reductions[Phi].getLoopExitInstr());
           continue;
         }
author	Tyler Nowicki <tyler.nowicki@gmail.com>	2015-08-10 19:51:46 +0000
committer	Tyler Nowicki <tyler.nowicki@gmail.com>	2015-08-10 19:51:46 +0000
commit	c1a86f586617f8127eaf5024d27238a92a735db1 (patch)
tree	da915860da5199e699a372a22cefd4a4bd1b476a /llvm/lib/Transforms
parent	c2e3ba48e315312560a2fb26cbb8e8045ab9991e (diff)
download	bcm5719-llvm-c1a86f586617f8127eaf5024d27238a92a735db1.tar.gz bcm5719-llvm-c1a86f586617f8127eaf5024d27238a92a735db1.zip