diff options
| author | Benjamin Kramer <benny.kra@googlemail.com> | 2016-03-31 10:42:40 +0000 |
|---|---|---|
| committer | Benjamin Kramer <benny.kra@googlemail.com> | 2016-03-31 10:42:40 +0000 |
| commit | cad9a8a6bb3f26f2d7dad6aeca83f2ce5ada0eef (patch) | |
| tree | 4548da56b3ade6ed52fd0a8ca3ecb22df8b58bc1 | |
| parent | 958b9e7e33b5db1acf3d5a026efe85abb6784ab7 (diff) | |
| download | bcm5719-llvm-cad9a8a6bb3f26f2d7dad6aeca83f2ce5ada0eef.tar.gz bcm5719-llvm-cad9a8a6bb3f26f2d7dad6aeca83f2ce5ada0eef.zip | |
[TTI] Let the cost model estimate ctpop costs based on legality
PPC has a vector popcount, this lets the vectorizer use the correct cost
for it. Tweak X86 test to use an intrinsic that's actually scalarized (we
have a somewhat efficient lowering for vector popcount using SSE, the
cost model finds that now).
llvm-svn: 265005
| -rw-r--r-- | llvm/include/llvm/CodeGen/BasicTTIImpl.h | 10 | ||||
| -rw-r--r-- | llvm/test/Analysis/CostModel/PowerPC/popcnt.ll | 11 | ||||
| -rw-r--r-- | llvm/test/Analysis/CostModel/X86/scalarize.ll | 16 |
3 files changed, 28 insertions, 9 deletions
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index db9960c1aa0..476f0d5fb53 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -621,6 +621,7 @@ public: unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys) { unsigned ISD = 0; + unsigned SingleCallCost = 10; // Library call cost. Make it expensive. switch (IID) { default: { // Assume that we need to scalarize this intrinsic. @@ -725,6 +726,13 @@ public: case Intrinsic::masked_load: return static_cast<T *>(this) ->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); + case Intrinsic::ctpop: + ISD = ISD::CTPOP; + // In case of legalization use TCC_Expensive. This is cheaper than a + // library call but still not a cheap instruction. + SingleCallCost = TargetTransformInfo::TCC_Expensive; + break; + // FIXME: ctlz, cttz, ... } const TargetLoweringBase *TLI = getTLI(); @@ -785,7 +793,7 @@ public: } // This is going to be turned into a library call, make it expensive. - return 10; + return SingleCallCost; } /// \brief Compute a cost of the given call instruction. diff --git a/llvm/test/Analysis/CostModel/PowerPC/popcnt.ll b/llvm/test/Analysis/CostModel/PowerPC/popcnt.ll new file mode 100644 index 00000000000..4bd842db09b --- /dev/null +++ b/llvm/test/Analysis/CostModel/PowerPC/popcnt.ll @@ -0,0 +1,11 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define <4 x i32> @test1(<4 x i32> %arg) { + ; CHECK: cost of 1 {{.*}} call <4 x i32> @llvm.ctpop.v4i32 + %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %arg) + ret <4 x i32> %ctpop +} + +declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) diff --git a/llvm/test/Analysis/CostModel/X86/scalarize.ll b/llvm/test/Analysis/CostModel/X86/scalarize.ll index fc25fcbc563..f124de50699 100644 --- a/llvm/test/Analysis/CostModel/X86/scalarize.ll +++ b/llvm/test/Analysis/CostModel/X86/scalarize.ll @@ -13,8 +13,8 @@ declare %i4 @llvm.bswap.v4i32(%i4) declare %i8 @llvm.bswap.v2i64(%i8) -declare %i4 @llvm.ctpop.v4i32(%i4) -declare %i8 @llvm.ctpop.v2i64(%i8) +declare %i4 @llvm.cttz.v4i32(%i4) +declare %i8 @llvm.cttz.v2i64(%i8) ; CHECK32-LABEL: test_scalarized_intrinsics ; CHECK64-LABEL: test_scalarized_intrinsics @@ -28,12 +28,12 @@ define void @test_scalarized_intrinsics() { ; CHECK64: cost of 6 {{.*}}bswap.v2i64 %r3 = call %i8 @llvm.bswap.v2i64(%i8 undef) -; CHECK32: cost of 12 {{.*}}ctpop.v4i32 -; CHECK64: cost of 12 {{.*}}ctpop.v4i32 - %r4 = call %i4 @llvm.ctpop.v4i32(%i4 undef) -; CHECK32: cost of 10 {{.*}}ctpop.v2i64 -; CHECK64: cost of 6 {{.*}}ctpop.v2i64 - %r5 = call %i8 @llvm.ctpop.v2i64(%i8 undef) +; CHECK32: cost of 12 {{.*}}cttz.v4i32 +; CHECK64: cost of 12 {{.*}}cttz.v4i32 + %r4 = call %i4 @llvm.cttz.v4i32(%i4 undef) +; CHECK32: cost of 10 {{.*}}cttz.v2i64 +; CHECK64: cost of 6 {{.*}}cttz.v2i64 + %r5 = call %i8 @llvm.cttz.v2i64(%i8 undef) ; CHECK32: ret ; CHECK64: ret |

