[X86] Adjust cost of FP_TO_UINT v8f32->v8i32

There is no direct AVX instruction to convert to unsigned. I have some ideas how we may be able to do this with three vector instructions but the current backend just bails on this to get it scalarized. See the comment why we need to adjust the cost returned by BasicTTI. The test is a bit roundabout (and checks assembly rather than bit code) because I'd like it to work even if at some point we could vectorize this conversion. Fixes <rdar://problem/16371920> llvm-svn: 205159
author: Adam Nemet <anemet@apple.com> 2014-03-30 18:07:13 +0000
committer: Adam Nemet <anemet@apple.com> 2014-03-30 18:07:13 +0000
commit: 6dafe97271c871f1568f6c98c8daacbd09ecbae9 (patch)
tree: 4d151d28833fcd8f2720f6ca299d417675ebe5f8 /llvm/test/Transforms/LoopVectorize
parent: 65f582f432729398265e53d04560e5007cf4ca8a (diff)
download: bcm5719-llvm-6dafe97271c871f1568f6c98c8daacbd09ecbae9.tar.gz
bcm5719-llvm-6dafe97271c871f1568f6c98c8daacbd09ecbae9.zip
1 files changed, 39 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
new file mode 100644
index 00000000000..529ed883c3b
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -mcpu=core-avx2 -loop-vectorize -S | llc -mcpu=core-avx2 | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+@float_array = common global [10000 x float] zeroinitializer, align 16
+@unsigned_array = common global [10000 x i32] zeroinitializer, align 16
+
+; If we need to scalarize the fptoui and then use inserts to build up the
+; vector again, then there is certainly no value in going 256-bit wide.
+; CHECK-NOT: vinserti128
+
+define void @convert(i32 %N) {
+entry:
+  %0 = icmp eq i32 %N, 0
+  br i1 %0, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds [10000 x float]* @float_array, i64 0, i64 %indvars.iv
+  %1 = load float* %arrayidx, align 4
+  %conv = fptoui float %1 to i32
+  %arrayidx2 = getelementptr inbounds [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
+
author	Adam Nemet <anemet@apple.com>	2014-03-30 18:07:13 +0000
committer	Adam Nemet <anemet@apple.com>	2014-03-30 18:07:13 +0000
commit	6dafe97271c871f1568f6c98c8daacbd09ecbae9 (patch)
tree	4d151d28833fcd8f2720f6ca299d417675ebe5f8 /llvm/test/Transforms/LoopVectorize
parent	65f582f432729398265e53d04560e5007cf4ca8a (diff)
download	bcm5719-llvm-6dafe97271c871f1568f6c98c8daacbd09ecbae9.tar.gz bcm5719-llvm-6dafe97271c871f1568f6c98c8daacbd09ecbae9.zip