summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-05-14 20:52:11 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-05-14 20:52:11 +0000
commitd0ef9d8e93f0565a23862253d9913d590c0e4197 (patch)
tree34b676b328eacc3d532d8810bedb93f51dd1f78b /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parentf96b4ab92d6a95a0720d18b1cd948a95d0cb54e7 (diff)
downloadbcm5719-llvm-d0ef9d8e93f0565a23862253d9913d590c0e4197.tar.gz
bcm5719-llvm-d0ef9d8e93f0565a23862253d9913d590c0e4197.zip
[X86][AVX1] Account for cost of extract/insert of 256-bit shifts
llvm-svn: 303023
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp98
1 files changed, 49 insertions, 49 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 4a5c7e395fd..80e18161a94 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -589,28 +589,28 @@ int X86TTIImpl::getArithmeticInstrCost(
return LT.first * Entry->Cost;
static const CostTblEntry SSE41CostTable[] = {
- { ISD::SHL, MVT::v16i8, 11 }, // pblendvb sequence.
- { ISD::SHL, MVT::v32i8, 2*11 }, // pblendvb sequence.
- { ISD::SHL, MVT::v8i16, 14 }, // pblendvb sequence.
- { ISD::SHL, MVT::v16i16, 2*14 }, // pblendvb sequence.
- { ISD::SHL, MVT::v4i32, 4 }, // pslld/paddd/cvttps2dq/pmulld
- { ISD::SHL, MVT::v8i32, 2*4 }, // pslld/paddd/cvttps2dq/pmulld
-
- { ISD::SRL, MVT::v16i8, 12 }, // pblendvb sequence.
- { ISD::SRL, MVT::v32i8, 2*12 }, // pblendvb sequence.
- { ISD::SRL, MVT::v8i16, 14 }, // pblendvb sequence.
- { ISD::SRL, MVT::v16i16, 2*14 }, // pblendvb sequence.
- { ISD::SRL, MVT::v4i32, 11 }, // Shift each lane + blend.
- { ISD::SRL, MVT::v8i32, 2*11 }, // Shift each lane + blend.
-
- { ISD::SRA, MVT::v16i8, 24 }, // pblendvb sequence.
- { ISD::SRA, MVT::v32i8, 2*24 }, // pblendvb sequence.
- { ISD::SRA, MVT::v8i16, 14 }, // pblendvb sequence.
- { ISD::SRA, MVT::v16i16, 2*14 }, // pblendvb sequence.
- { ISD::SRA, MVT::v4i32, 12 }, // Shift each lane + blend.
- { ISD::SRA, MVT::v8i32, 2*12 }, // Shift each lane + blend.
-
- { ISD::MUL, MVT::v4i32, 1 } // pmulld
+ { ISD::SHL, MVT::v16i8, 11 }, // pblendvb sequence.
+ { ISD::SHL, MVT::v32i8, 2*11+2 }, // pblendvb sequence + split.
+ { ISD::SHL, MVT::v8i16, 14 }, // pblendvb sequence.
+ { ISD::SHL, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split.
+ { ISD::SHL, MVT::v4i32, 4 }, // pslld/paddd/cvttps2dq/pmulld
+ { ISD::SHL, MVT::v8i32, 2*4+2 }, // pslld/paddd/cvttps2dq/pmulld + split
+
+ { ISD::SRL, MVT::v16i8, 12 }, // pblendvb sequence.
+ { ISD::SRL, MVT::v32i8, 2*12+2 }, // pblendvb sequence + split.
+ { ISD::SRL, MVT::v8i16, 14 }, // pblendvb sequence.
+ { ISD::SRL, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split.
+ { ISD::SRL, MVT::v4i32, 11 }, // Shift each lane + blend.
+ { ISD::SRL, MVT::v8i32, 2*11+2 }, // Shift each lane + blend + split.
+
+ { ISD::SRA, MVT::v16i8, 24 }, // pblendvb sequence.
+ { ISD::SRA, MVT::v32i8, 2*24+2 }, // pblendvb sequence + split.
+ { ISD::SRA, MVT::v8i16, 14 }, // pblendvb sequence.
+ { ISD::SRA, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split.
+ { ISD::SRA, MVT::v4i32, 12 }, // Shift each lane + blend.
+ { ISD::SRA, MVT::v8i32, 2*12+2 }, // Shift each lane + blend + split.
+
+ { ISD::MUL, MVT::v4i32, 1 } // pmulld
};
if (ST->hasSSE41())
@@ -620,33 +620,33 @@ int X86TTIImpl::getArithmeticInstrCost(
static const CostTblEntry SSE2CostTable[] = {
// We don't correctly identify costs of casts because they are marked as
// custom.
- { ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence.
- { ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence.
- { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
- { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence.
- { ISD::SHL, MVT::v4i64, 2*4 }, // splat+shuffle sequence.
-
- { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence.
- { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence.
- { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend.
- { ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence.
- { ISD::SRL, MVT::v4i64, 2*4 }, // splat+shuffle sequence.
-
- { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence.
- { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence.
- { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend.
- { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence.
- { ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence.
-
- { ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence.
- { ISD::MUL, MVT::v8i16, 1 }, // pmullw
- { ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle
- { ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add
-
- { ISD::FDIV, MVT::f32, 23 }, // Pentium IV from http://www.agner.org/
- { ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/
- { ISD::FDIV, MVT::f64, 38 }, // Pentium IV from http://www.agner.org/
- { ISD::FDIV, MVT::v2f64, 69 }, // Pentium IV from http://www.agner.org/
+ { ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence.
+ { ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence.
+ { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
+ { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence.
+ { ISD::SHL, MVT::v4i64, 2*4+2 }, // splat+shuffle sequence + split.
+
+ { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence.
+ { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence.
+ { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend.
+ { ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence.
+ { ISD::SRL, MVT::v4i64, 2*4+2 }, // splat+shuffle sequence + split.
+
+ { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence.
+ { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence.
+ { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend.
+ { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence.
+ { ISD::SRA, MVT::v4i64, 2*12+2 }, // srl/xor/sub sequence+split.
+
+ { ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence.
+ { ISD::MUL, MVT::v8i16, 1 }, // pmullw
+ { ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle
+ { ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add
+
+ { ISD::FDIV, MVT::f32, 23 }, // Pentium IV from http://www.agner.org/
+ { ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/
+ { ISD::FDIV, MVT::f64, 38 }, // Pentium IV from http://www.agner.org/
+ { ISD::FDIV, MVT::v2f64, 69 }, // Pentium IV from http://www.agner.org/
// It is not a good idea to vectorize division. We have to scalarize it and
// in the process we will often end up having to spilling regular
OpenPOWER on IntegriCloud