[ARM] Adding IEEE-754 SIMD detection to loop vectorizer

Some SIMD implementations are not IEEE-754 compliant, for example ARM's NEON. This patch teaches the loop vectorizer to only allow transformations of loops that either contain no floating-point operations or have enough allowance flags supporting lack of precision (ex. -ffast-math, Darwin). For that, the target description now has a method which tells us if the vectorizer is allowed to handle FP math without falling into unsafe representations, plus a check on every FP instruction in the candidate loop to check for the safety flags. This commit makes LLVM behave like GCC with respect to ARM NEON support, but it stops short of fixing the underlying problem: sub-normals. Neither GCC nor LLVM have a flag for allowing sub-normal operations. Before this patch, GCC only allows it using unsafe-math flags and LLVM allows it by default with no way to turn it off (short of not using NEON at all). As a first step, we push this change to make it safe and in sync with GCC. The second step is to discuss a new sub-normal's flag on both communitues and come up with a common solution. The third step is to improve the FastMath flags in LLVM to encode sub-normals and use those flags to restrict NEON FP. Fixes PR16275. llvm-svn: 266363
author: Renato Golin <renato.golin@linaro.org> 2016-04-14 20:42:18 +0000
committer: Renato Golin <renato.golin@linaro.org> 2016-04-14 20:42:18 +0000
commit: 5cb666add7d47962c82db4da224a7978fb69f403 (patch)
tree: 2d94cd96392937da81a6b9708c399ac1d8b5c8a8 /llvm/lib
parent: e998b91d865e6d0db389b026d7b85f65ef5a2cad (diff)
download: bcm5719-llvm-5cb666add7d47962c82db4da224a7978fb69f403.tar.gz
bcm5719-llvm-5cb666add7d47962c82db4da224a7978fb69f403.zip
3 files changed, 56 insertions, 6 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 48e441bac69..9b23d7ca932 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -172,6 +172,10 @@ bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
   return TTIImpl->enableInterleavedAccessVectorization();
 }
 
+bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
+  return TTIImpl->isFPVectorizationPotentiallyUnsafe();
+}
+
 TargetTransformInfo::PopcntSupportKind
 TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
   return TTIImpl->getPopcntSupport(IntTyWidthInBit);
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 7808587c205..0fe964f36de 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -54,6 +54,10 @@ public:
 
   bool enableInterleavedAccessVectorization() { return true; }
 
+  bool isFPVectorizationPotentiallyUnsafe() {
+    return !ST->hasFPARMv8() && !ST->isTargetDarwin();
+  }
+
   /// \name Scalar TTI Implementations
   /// @{
 
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2c5fec64c18..8072d06c34b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -963,6 +963,9 @@ class LoopVectorizeHints {
   /// Return the loop metadata prefix.
   static StringRef Prefix() { return "llvm.loop."; }
 
+  /// True if there is any unsafe math in the loop.
+  bool PotentiallyUnsafe;
+
 public:
   enum ForceKind {
     FK_Undefined = -1, ///< Not selected.
@@ -975,7 +978,7 @@ public:
               HK_WIDTH),
         Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
         Force("vectorize.enable", FK_Undefined, HK_FORCE),
-        TheLoop(L) {
+        PotentiallyUnsafe(false), TheLoop(L) {
     // Populate values with existing loop metadata.
     getHintsFromMetadata();
 
@@ -1073,6 +1076,19 @@ public:
     return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
   }
 
+  bool isPotentiallyUnsafe() const {
+    // Avoid FP vectorization if the target is unsure about proper support.
+    // This may be related to the SIMD unit in the target not handling
+    // IEEE 754 FP ops properly, or bad single-to-double promotions.
+    // Otherwise, a sequence of vectorized loops, even without reduction,
+    // could lead to different end results on the destination vectors.
+    return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
+  }
+
+  void setPotentiallyUnsafe() {
+    PotentiallyUnsafe = true;
+  }
+
 private:
   /// Find hints specified in the loop metadata and update local values.
   void getHintsFromMetadata() {
@@ -1234,7 +1250,7 @@ public:
                             const TargetTransformInfo *TTI,
                             LoopAccessAnalysis *LAA,
                             LoopVectorizationRequirements *R,
-                            const LoopVectorizeHints *H)
+                            LoopVectorizeHints *H)
       : NumPredStores(0), TheLoop(L), PSE(PSE), TLI(TLI), TheFunction(F),
         TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(PSE, L, DT),
         Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false),
@@ -1460,7 +1476,7 @@ private:
   LoopVectorizationRequirements *Requirements;
 
   /// Used to emit an analysis of any legality issues.
-  const LoopVectorizeHints *Hints;
+  LoopVectorizeHints *Hints;
 
   ValueToValueMap Strides;
   SmallPtrSet<Value *, 8> StrideSet;
@@ -1884,6 +1900,21 @@ struct LoopVectorize : public FunctionPass {
       return false;
     }
 
+    // Check if the target supports potentially unsafe FP vectorization.
+    // FIXME: Add a check for the type of safety issue (denormal, signaling)
+    // for the target we're vectorizing for, to make sure none of the
+    // additional fp-math flags can help.
+    if (Hints.isPotentiallyUnsafe() &&
+        TTI->isFPVectorizationPotentiallyUnsafe()) {
+      DEBUG(dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n");
+      emitAnalysisDiag(
+          F, L, Hints,
+          VectorizationReport()
+             << "loop not vectorized due to unsafe FP support.");
+      emitMissedWarning(F, L, Hints);
+      return false;
+    }
+
     // Select the optimal vectorization factor.
     const LoopVectorizationCostModel::VectorizationFactor VF =
         CM.selectVectorizationFactor(OptForSize);
@@ -4695,12 +4726,23 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         }
         if (EnableMemAccessVersioning)
           collectStridedAccess(ST);
-      }
 
-      if (EnableMemAccessVersioning)
-        if (LoadInst *LI = dyn_cast<LoadInst>(it))
+      } else if (LoadInst *LI = dyn_cast<LoadInst>(it)) {
+        if (EnableMemAccessVersioning)
           collectStridedAccess(LI);
 
+      // FP instructions can allow unsafe algebra, thus vectorizable by
+      // non-IEEE-754 compliant SIMD units.
+      // This applies to floating-point math operations and calls, not memory
+      // operations, shuffles, or casts, as they don't change precision or
+      // semantics.
+      } else if (it->getType()->isFloatingPointTy() &&
+                (CI || it->isBinaryOp()) &&
+                !it->hasUnsafeAlgebra()) {
+        DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
+        Hints->setPotentiallyUnsafe();
+      }
+
       // Reduction instructions are allowed to have exit users.
       // All other instructions must not have external users.
       if (hasOutsideLoopUser(TheLoop, &*it, AllowedExit)) {
author	Renato Golin <renato.golin@linaro.org>	2016-04-14 20:42:18 +0000
committer	Renato Golin <renato.golin@linaro.org>	2016-04-14 20:42:18 +0000
commit	5cb666add7d47962c82db4da224a7978fb69f403 (patch)
tree	2d94cd96392937da81a6b9708c399ac1d8b5c8a8 /llvm/lib
parent	e998b91d865e6d0db389b026d7b85f65ef5a2cad (diff)
download	bcm5719-llvm-5cb666add7d47962c82db4da224a7978fb69f403.tar.gz bcm5719-llvm-5cb666add7d47962c82db4da224a7978fb69f403.zip