[LoopVectorizer] give more advice in remark about failure to vectorize call

Something like this is requested by: https://bugs.llvm.org/show_bug.cgi?id=40265 ...and it seems like a common enough case that we should acknowledge it. Differential Revision: https://reviews.llvm.org/D56551 llvm-svn: 351010
author: Sanjay Patel <spatel@rotateright.com> 2019-01-12 15:27:15 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2019-01-12 15:27:15 +0000
commit: 7d65fe5cd55183dfed63a27f0f20e33e4b401e5b (patch)
tree: 7df426f87f630d41b316bbe1e3dcb47a3a93cf73 /llvm
parent: 1b4a240bfe2f548f29beec177fdc2792290872c5 (diff)
download: bcm5719-llvm-7d65fe5cd55183dfed63a27f0f20e33e4b401e5b.tar.gz
bcm5719-llvm-7d65fe5cd55183dfed63a27f0f20e33e4b401e5b.zip
2 files changed, 75 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index ac989dd66f7..b44fe5a52a2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -714,10 +714,30 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           !isa<DbgInfoIntrinsic>(CI) &&
           !(CI->getCalledFunction() && TLI &&
             TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
-        ORE->emit(createMissedAnalysis("CantVectorizeCall", CI)
-                  << "call instruction cannot be vectorized");
+        // If the call is a recognized math libary call, it is likely that
+        // we can vectorize it given loosened floating-point constraints.
+        LibFunc Func;
+        bool IsMathLibCall =
+            TLI && CI->getCalledFunction() &&
+            CI->getType()->isFloatingPointTy() &&
+            TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
+            TLI->hasOptimizedCodeGen(Func);
+
+        if (IsMathLibCall) {
+          // TODO: Ideally, we should not use clang-specific language here,
+          // but it's hard to provide meaningful yet generic advice.
+          // Also, should this be guarded by allowExtraAnalysis() and/or be part
+          // of the returned info from isFunctionVectorizable()?
+          ORE->emit(createMissedAnalysis("CantVectorizeLibcall", CI)
+              << "library call cannot be vectorized. "
+                 "Try compiling with -fno-math-errno, -ffast-math, "
+                 "or similar flags");
+        } else {
+          ORE->emit(createMissedAnalysis("CantVectorizeCall", CI)
+                    << "call instruction cannot be vectorized");
+        }
         LLVM_DEBUG(
-            dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n");
+            dbgs() << "LV: Found a non-intrinsic callsite.\n");
         return false;
       }
 
diff --git a/llvm/test/Transforms/LoopVectorize/libcall-remark.ll b/llvm/test/Transforms/LoopVectorize/libcall-remark.ll
new file mode 100644
index 00000000000..a1a7e461c70
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/libcall-remark.ll
@@ -0,0 +1,52 @@
+; RUN: opt -S -loop-vectorize < %s 2>&1 -pass-remarks-analysis=.* | FileCheck %s
+
+; Test the optimization remark emitter for recognition 
+; of a mathlib function vs. an arbitrary function.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+@data = external local_unnamed_addr global [32768 x float], align 16
+
+; CHECK: loop not vectorized: library call cannot be vectorized
+
+define void @libcall_blocks_vectorization() {
+entry:
+  br label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds [32768 x float], [32768 x float]* @data, i64 0, i64 %indvars.iv
+  %t0 = load float, float* %arrayidx, align 4
+  %sqrtf = tail call float @sqrtf(float %t0)
+  store float %sqrtf, float* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 32768
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK: loop not vectorized: call instruction cannot be vectorized
+
+define void @arbitrary_call_blocks_vectorization() {
+entry:
+  br label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds [32768 x float], [32768 x float]* @data, i64 0, i64 %indvars.iv
+  %t0 = load float, float* %arrayidx, align 4
+  %sqrtf = tail call float @arbitrary(float %t0)
+  store float %sqrtf, float* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 32768
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+declare float @sqrtf(float)
+declare float @arbitrary(float)
+
author	Sanjay Patel <spatel@rotateright.com>	2019-01-12 15:27:15 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2019-01-12 15:27:15 +0000
commit	7d65fe5cd55183dfed63a27f0f20e33e4b401e5b (patch)
tree	7df426f87f630d41b316bbe1e3dcb47a3a93cf73 /llvm
parent	1b4a240bfe2f548f29beec177fdc2792290872c5 (diff)
download	bcm5719-llvm-7d65fe5cd55183dfed63a27f0f20e33e4b401e5b.tar.gz bcm5719-llvm-7d65fe5cd55183dfed63a27f0f20e33e4b401e5b.zip