summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-01-20 13:55:01 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-01-20 13:55:01 +0000
commitc934d3a01b05939af797b7fca07dd3ddaaa9d5fd (patch)
tree2d531dae63a572e8f00a59755d076b4db3c573d9 /llvm/lib/Target/X86
parent1231904c489437965e880baee60b459b621785f8 (diff)
downloadbcm5719-llvm-c934d3a01b05939af797b7fca07dd3ddaaa9d5fd.tar.gz
bcm5719-llvm-c934d3a01b05939af797b7fca07dd3ddaaa9d5fd.zip
[CostModel][X86] Add explicit vector select costs
Prior to SSE41 (and sometimes on AVX1), vector select has to be performed as a ((X & C)|(Y & ~C)) bit select. Exposes a couple of issues with the min/max reduction costs (which only go down to SSE42 for some reason). The increase pre-SSE41 selection costs also prevent a couple of tests from firing any longer, so I've either tweaked the target or added AVX tests as well to the existing SSE2 tests. llvm-svn: 351685
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp41
1 files changed, 41 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 1d94eed2372..ec6dc72728a 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1653,6 +1653,9 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
static const CostTblEntry AVX512BWCostTbl[] = {
{ ISD::SETCC, MVT::v32i16, 1 },
{ ISD::SETCC, MVT::v64i8, 1 },
+
+ { ISD::SELECT, MVT::v32i16, 1 },
+ { ISD::SELECT, MVT::v64i8, 1 },
};
static const CostTblEntry AVX512CostTbl[] = {
@@ -1660,6 +1663,11 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
{ ISD::SETCC, MVT::v16i32, 1 },
{ ISD::SETCC, MVT::v8f64, 1 },
{ ISD::SETCC, MVT::v16f32, 1 },
+
+ { ISD::SELECT, MVT::v8i64, 1 },
+ { ISD::SELECT, MVT::v16i32, 1 },
+ { ISD::SELECT, MVT::v8f64, 1 },
+ { ISD::SELECT, MVT::v16f32, 1 },
};
static const CostTblEntry AVX2CostTbl[] = {
@@ -1667,6 +1675,11 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
{ ISD::SETCC, MVT::v8i32, 1 },
{ ISD::SETCC, MVT::v16i16, 1 },
{ ISD::SETCC, MVT::v32i8, 1 },
+
+ { ISD::SELECT, MVT::v4i64, 1 }, // pblendvb
+ { ISD::SELECT, MVT::v8i32, 1 }, // pblendvb
+ { ISD::SELECT, MVT::v16i16, 1 }, // pblendvb
+ { ISD::SELECT, MVT::v32i8, 1 }, // pblendvb
};
static const CostTblEntry AVX1CostTbl[] = {
@@ -1677,6 +1690,13 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
{ ISD::SETCC, MVT::v8i32, 4 },
{ ISD::SETCC, MVT::v16i16, 4 },
{ ISD::SETCC, MVT::v32i8, 4 },
+
+ { ISD::SELECT, MVT::v4f64, 1 }, // vblendvpd
+ { ISD::SELECT, MVT::v8f32, 1 }, // vblendvps
+ { ISD::SELECT, MVT::v4i64, 1 }, // vblendvpd
+ { ISD::SELECT, MVT::v8i32, 1 }, // vblendvps
+ { ISD::SELECT, MVT::v16i16, 3 }, // vandps + vandnps + vorps
+ { ISD::SELECT, MVT::v32i8, 3 }, // vandps + vandnps + vorps
};
static const CostTblEntry SSE42CostTbl[] = {
@@ -1685,6 +1705,15 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
{ ISD::SETCC, MVT::v2i64, 1 },
};
+ static const CostTblEntry SSE41CostTbl[] = {
+ { ISD::SELECT, MVT::v2f64, 1 }, // blendvpd
+ { ISD::SELECT, MVT::v4f32, 1 }, // blendvps
+ { ISD::SELECT, MVT::v2i64, 1 }, // pblendvb
+ { ISD::SELECT, MVT::v4i32, 1 }, // pblendvb
+ { ISD::SELECT, MVT::v8i16, 1 }, // pblendvb
+ { ISD::SELECT, MVT::v16i8, 1 }, // pblendvb
+ };
+
static const CostTblEntry SSE2CostTbl[] = {
{ ISD::SETCC, MVT::v2f64, 2 },
{ ISD::SETCC, MVT::f64, 1 },
@@ -1692,11 +1721,19 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
{ ISD::SETCC, MVT::v4i32, 1 },
{ ISD::SETCC, MVT::v8i16, 1 },
{ ISD::SETCC, MVT::v16i8, 1 },
+
+ { ISD::SELECT, MVT::v2f64, 3 }, // andpd + andnpd + orpd
+ { ISD::SELECT, MVT::v2i64, 3 }, // pand + pandn + por
+ { ISD::SELECT, MVT::v4i32, 3 }, // pand + pandn + por
+ { ISD::SELECT, MVT::v8i16, 3 }, // pand + pandn + por
+ { ISD::SELECT, MVT::v16i8, 3 }, // pand + pandn + por
};
static const CostTblEntry SSE1CostTbl[] = {
{ ISD::SETCC, MVT::v4f32, 2 },
{ ISD::SETCC, MVT::f32, 1 },
+
+ { ISD::SELECT, MVT::v4f32, 3 }, // andps + andnps + orps
};
if (ST->hasBWI())
@@ -1719,6 +1756,10 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
return LT.first * Entry->Cost;
+ if (ST->hasSSE41())
+ if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
if (ST->hasSSE2())
if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
return LT.first * Entry->Cost;
OpenPOWER on IntegriCloud