diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-09-30 08:17:50 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-09-30 08:17:50 +0000 |
commit | 3d11c994f7d85474b80409efb6e0b4916910252d (patch) | |
tree | 3ab63bc2d19c13e267bc224758bd7b193a9a8977 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp | |
parent | 82d705e6d99812bd66db531414153117f50728a4 (diff) | |
download | bcm5719-llvm-3d11c994f7d85474b80409efb6e0b4916910252d.tar.gz bcm5719-llvm-3d11c994f7d85474b80409efb6e0b4916910252d.zip |
[X86][XOP] Added support for the lowering of 128-bit vector shifts to XOP shift instructions
The XOP shifts just have logical/arithmetic versions and the left/right shifts are controlled by whether the value is positive/negative. Because of this I've added new X86ISD nodes instead of trying to force them to use the existing shift nodes.
Additionally Excavator cores (bdver4) support XOP and AVX2 - meaning that it should use the AVX2 shifts when it can and fall back to XOP in other cases.
Differential Revision: http://reviews.llvm.org/D8690
llvm-svn: 248878
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 76 |
1 files changed, 61 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 0cf3163c89d..f23057083f9 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -140,6 +140,12 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::SRA, MVT::v8i64, 1 }, }; + if (ST->hasAVX512()) { + int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second); + if (Idx != -1) + return LT.first * AVX512CostTable[Idx].Cost; + } + static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = { // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to // customize them to detect the cases where shift amount is a scalar one. @@ -153,7 +159,59 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::SRL, MVT::v2i64, 1 }, { ISD::SHL, MVT::v4i64, 1 }, { ISD::SRL, MVT::v4i64, 1 }, + }; + + // Look for AVX2 lowering tricks. + if (ST->hasAVX2()) { + if (ISD == ISD::SHL && LT.second == MVT::v16i16 && + (Op2Info == TargetTransformInfo::OK_UniformConstantValue || + Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)) + // On AVX2, a packed v16i16 shift left by a constant build_vector + // is lowered into a vector multiply (vpmullw). + return LT.first; + int Idx = CostTableLookup(AVX2CostTable, ISD, LT.second); + if (Idx != -1) + return LT.first * AVX2CostTable[Idx].Cost; + } + + static const CostTblEntry<MVT::SimpleValueType> XOPCostTable[] = { + // 128bit shifts take 1cy, but right shifts require negation beforehand. + { ISD::SHL, MVT::v16i8, 1 }, + { ISD::SRL, MVT::v16i8, 2 }, + { ISD::SRA, MVT::v16i8, 2 }, + { ISD::SHL, MVT::v8i16, 1 }, + { ISD::SRL, MVT::v8i16, 2 }, + { ISD::SRA, MVT::v8i16, 2 }, + { ISD::SHL, MVT::v4i32, 1 }, + { ISD::SRL, MVT::v4i32, 2 }, + { ISD::SRA, MVT::v4i32, 2 }, + { ISD::SHL, MVT::v2i64, 1 }, + { ISD::SRL, MVT::v2i64, 2 }, + { ISD::SRA, MVT::v2i64, 2 }, + // 256bit shifts require splitting if AVX2 didn't catch them above. + { ISD::SHL, MVT::v32i8, 2 }, + { ISD::SRL, MVT::v32i8, 4 }, + { ISD::SRA, MVT::v32i8, 4 }, + { ISD::SHL, MVT::v16i16, 2 }, + { ISD::SRL, MVT::v16i16, 4 }, + { ISD::SRA, MVT::v16i16, 4 }, + { ISD::SHL, MVT::v8i32, 2 }, + { ISD::SRL, MVT::v8i32, 4 }, + { ISD::SRA, MVT::v8i32, 4 }, + { ISD::SHL, MVT::v4i64, 2 }, + { ISD::SRL, MVT::v4i64, 4 }, + { ISD::SRA, MVT::v4i64, 4 }, + }; + + // Look for XOP lowering tricks. + if (ST->hasXOP()) { + int Idx = CostTableLookup(XOPCostTable, ISD, LT.second); + if (Idx != -1) + return LT.first * XOPCostTable[Idx].Cost; + } + + static const CostTblEntry<MVT::SimpleValueType> AVX2CustomCostTable[] = { { ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence. { ISD::SHL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence. @@ -176,23 +234,11 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::UDIV, MVT::v4i64, 4*20 }, }; - if (ST->hasAVX512()) { - int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second); - if (Idx != -1) - return LT.first * AVX512CostTable[Idx].Cost; - } - // Look for AVX2 lowering tricks. + // Look for AVX2 lowering tricks for custom cases. if (ST->hasAVX2()) { - if (ISD == ISD::SHL && LT.second == MVT::v16i16 && - (Op2Info == TargetTransformInfo::OK_UniformConstantValue || - Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)) - // On AVX2, a packed v16i16 shift left by a constant build_vector - // is lowered into a vector multiply (vpmullw). - return LT.first; - - int Idx = CostTableLookup(AVX2CostTable, ISD, LT.second); + int Idx = CostTableLookup(AVX2CustomCostTable, ISD, LT.second); if (Idx != -1) - return LT.first * AVX2CostTable[Idx].Cost; + return LT.first * AVX2CustomCostTable[Idx].Cost; } static const CostTblEntry<MVT::SimpleValueType> |