summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/BasicTargetTransformInfo.cpp5
-rw-r--r--llvm/test/Analysis/CostModel/ARM/cast.ll32
-rw-r--r--llvm/test/Analysis/CostModel/X86/scalarize.ll41
3 files changed, 60 insertions, 18 deletions
diff --git a/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
index e3ecf14d1f2..03c5eba3692 100644
--- a/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -20,7 +20,6 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Target/TargetLowering.h"
#include <utility>
-
using namespace llvm;
namespace {
@@ -405,7 +404,9 @@ unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) const {
- return 1;
+ std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Val->getScalarType());
+
+ return LT.first;
}
unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,
diff --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll
index a6ed798b95b..662110f2720 100644
--- a/llvm/test/Analysis/CostModel/ARM/cast.ll
+++ b/llvm/test/Analysis/CostModel/ARM/cast.ll
@@ -221,9 +221,9 @@ define i32 @casts() {
%r96 = fptoui <2 x float> undef to <2 x i32>
; CHECK: cost of 1 {{.*}} fptosi
%r97 = fptosi <2 x float> undef to <2 x i32>
- ; CHECK: cost of 24 {{.*}} fptoui
+ ; CHECK: cost of 28 {{.*}} fptoui
%r98 = fptoui <2 x float> undef to <2 x i64>
- ; CHECK: cost of 24 {{.*}} fptosi
+ ; CHECK: cost of 28 {{.*}} fptosi
%r99 = fptosi <2 x float> undef to <2 x i64>
; CHECK: cost of 8 {{.*}} fptoui
@@ -242,9 +242,9 @@ define i32 @casts() {
%r106 = fptoui <2 x double> undef to <2 x i32>
; CHECK: cost of 2 {{.*}} fptosi
%r107 = fptosi <2 x double> undef to <2 x i32>
- ; CHECK: cost of 24 {{.*}} fptoui
+ ; CHECK: cost of 28 {{.*}} fptoui
%r108 = fptoui <2 x double> undef to <2 x i64>
- ; CHECK: cost of 24 {{.*}} fptosi
+ ; CHECK: cost of 28 {{.*}} fptosi
%r109 = fptosi <2 x double> undef to <2 x i64>
; CHECK: cost of 16 {{.*}} fptoui
@@ -263,9 +263,9 @@ define i32 @casts() {
%r116 = fptoui <4 x float> undef to <4 x i32>
; CHECK: cost of 1 {{.*}} fptosi
%r117 = fptosi <4 x float> undef to <4 x i32>
- ; CHECK: cost of 48 {{.*}} fptoui
+ ; CHECK: cost of 56 {{.*}} fptoui
%r118 = fptoui <4 x float> undef to <4 x i64>
- ; CHECK: cost of 48 {{.*}} fptosi
+ ; CHECK: cost of 56 {{.*}} fptosi
%r119 = fptosi <4 x float> undef to <4 x i64>
; CHECK: cost of 16 {{.*}} fptoui
@@ -284,9 +284,9 @@ define i32 @casts() {
%r126 = fptoui <4 x double> undef to <4 x i32>
; CHECK: cost of 16 {{.*}} fptosi
%r127 = fptosi <4 x double> undef to <4 x i32>
- ; CHECK: cost of 48 {{.*}} fptoui
+ ; CHECK: cost of 56 {{.*}} fptoui
%r128 = fptoui <4 x double> undef to <4 x i64>
- ; CHECK: cost of 48 {{.*}} fptosi
+ ; CHECK: cost of 56 {{.*}} fptosi
%r129 = fptosi <4 x double> undef to <4 x i64>
; CHECK: cost of 32 {{.*}} fptoui
@@ -305,9 +305,9 @@ define i32 @casts() {
%r136 = fptoui <8 x float> undef to <8 x i32>
; CHECK: cost of 2 {{.*}} fptosi
%r137 = fptosi <8 x float> undef to <8 x i32>
- ; CHECK: cost of 96 {{.*}} fptoui
+ ; CHECK: cost of 112 {{.*}} fptoui
%r138 = fptoui <8 x float> undef to <8 x i64>
- ; CHECK: cost of 96 {{.*}} fptosi
+ ; CHECK: cost of 112 {{.*}} fptosi
%r139 = fptosi <8 x float> undef to <8 x i64>
; CHECK: cost of 32 {{.*}} fptoui
@@ -326,9 +326,9 @@ define i32 @casts() {
%r146 = fptoui <8 x double> undef to <8 x i32>
; CHECK: cost of 32 {{.*}} fptosi
%r147 = fptosi <8 x double> undef to <8 x i32>
- ; CHECK: cost of 96 {{.*}} fptoui
+ ; CHECK: cost of 112 {{.*}} fptoui
%r148 = fptoui <8 x double> undef to <8 x i64>
- ; CHECK: cost of 96 {{.*}} fptosi
+ ; CHECK: cost of 112 {{.*}} fptosi
%r149 = fptosi <8 x double> undef to <8 x i64>
; CHECK: cost of 64 {{.*}} fptoui
@@ -347,9 +347,9 @@ define i32 @casts() {
%r156 = fptoui <16 x float> undef to <16 x i32>
; CHECK: cost of 4 {{.*}} fptosi
%r157 = fptosi <16 x float> undef to <16 x i32>
- ; CHECK: cost of 192 {{.*}} fptoui
+ ; CHECK: cost of 224 {{.*}} fptoui
%r158 = fptoui <16 x float> undef to <16 x i64>
- ; CHECK: cost of 192 {{.*}} fptosi
+ ; CHECK: cost of 224 {{.*}} fptosi
%r159 = fptosi <16 x float> undef to <16 x i64>
; CHECK: cost of 64 {{.*}} fptoui
@@ -368,9 +368,9 @@ define i32 @casts() {
%r166 = fptoui <16 x double> undef to <16 x i32>
; CHECK: cost of 64 {{.*}} fptosi
%r167 = fptosi <16 x double> undef to <16 x i32>
- ; CHECK: cost of 192 {{.*}} fptoui
+ ; CHECK: cost of 224 {{.*}} fptoui
%r168 = fptoui <16 x double> undef to <16 x i64>
- ; CHECK: cost of 192 {{.*}} fptosi
+ ; CHECK: cost of 224 {{.*}} fptosi
%r169 = fptosi <16 x double> undef to <16 x i64>
; CHECK: cost of 8 {{.*}} uitofp
diff --git a/llvm/test/Analysis/CostModel/X86/scalarize.ll b/llvm/test/Analysis/CostModel/X86/scalarize.ll
new file mode 100644
index 00000000000..fc25fcbc563
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/X86/scalarize.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=i386 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK32
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK64
+
+; Test vector scalarization costs.
+; RUN: llc < %s -march=x86 -mcpu=i386
+; RUN: llc < %s -march=x86 -mcpu=yonah
+
+%i4 = type <4 x i32>
+%i8 = type <2 x i64>
+
+;;; TEST HANDLING OF VARIOUS VECTOR SIZES
+
+declare %i4 @llvm.bswap.v4i32(%i4)
+declare %i8 @llvm.bswap.v2i64(%i8)
+
+declare %i4 @llvm.ctpop.v4i32(%i4)
+declare %i8 @llvm.ctpop.v2i64(%i8)
+
+; CHECK32-LABEL: test_scalarized_intrinsics
+; CHECK64-LABEL: test_scalarized_intrinsics
+define void @test_scalarized_intrinsics() {
+ %r1 = add %i8 undef, undef
+
+; CHECK32: cost of 12 {{.*}}bswap.v4i32
+; CHECK64: cost of 12 {{.*}}bswap.v4i32
+ %r2 = call %i4 @llvm.bswap.v4i32(%i4 undef)
+; CHECK32: cost of 10 {{.*}}bswap.v2i64
+; CHECK64: cost of 6 {{.*}}bswap.v2i64
+ %r3 = call %i8 @llvm.bswap.v2i64(%i8 undef)
+
+; CHECK32: cost of 12 {{.*}}ctpop.v4i32
+; CHECK64: cost of 12 {{.*}}ctpop.v4i32
+ %r4 = call %i4 @llvm.ctpop.v4i32(%i4 undef)
+; CHECK32: cost of 10 {{.*}}ctpop.v2i64
+; CHECK64: cost of 6 {{.*}}ctpop.v2i64
+ %r5 = call %i8 @llvm.ctpop.v2i64(%i8 undef)
+
+; CHECK32: ret
+; CHECK64: ret
+ ret void
+}
OpenPOWER on IntegriCloud