summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2015-07-29 20:31:45 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2015-07-29 20:31:45 +0000
commit86478c6909ebd71667413fb27b483d3bc0bff3dc (patch)
tree610104db9faf7f3eaf6feb55aeefa88e36d42629 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parent46682630f41e0ed95b7f5ea73f2445357b16c67b (diff)
downloadbcm5719-llvm-86478c6909ebd71667413fb27b483d3bc0bff3dc.tar.gz
bcm5719-llvm-86478c6909ebd71667413fb27b483d3bc0bff3dc.zip
[X86][SSE] Vectorize i64 ASHR operations
This patch vectorizes the v2i64/v4i64 ASHR shift operations - the last remaining integer vector shifts that are still being transferred to/from the scalar unit to be completed. Differential Revision: http://reviews.llvm.org/D11439 llvm-svn: 243569
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp5
1 files changed, 3 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index a2d0884c4d4..04c16f8dfc7 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -163,7 +163,8 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v32i8, 24 }, // vpblendvb sequence.
{ ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence.
- { ISD::SRA, MVT::v4i64, 4*10 }, // Scalarized.
+ { ISD::SRA, MVT::v2i64, 4 }, // srl/xor/sub sequence.
+ { ISD::SRA, MVT::v4i64, 4 }, // srl/xor/sub sequence.
// Vectorizing division is a bad idea. See the SSE2 table for more comments.
{ ISD::SDIV, MVT::v32i8, 32*20 },
@@ -270,7 +271,7 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence.
{ ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence.
{ ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend.
- { ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized.
+ { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence.
// It is not a good idea to vectorize division. We have to scalarize it and
// in the process we will often end up having to spilling regular
OpenPOWER on IntegriCloud