summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-01-07 21:33:00 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-01-07 21:33:00 +0000
commite70644dab7dc0208cf00ba3605199e0bdc278c19 (patch)
treee88b3b32a5ec3e23aa48d59de63719a2adc92241 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parent935beac173c38e48c96c3d9d59dc80f046fa1d87 (diff)
downloadbcm5719-llvm-e70644dab7dc0208cf00ba3605199e0bdc278c19.tar.gz
bcm5719-llvm-e70644dab7dc0208cf00ba3605199e0bdc278c19.zip
[CostModel][X86] Generalized cost calculation of SHL by constant -> MUL conversion.
llvm-svn: 291364
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp31
1 files changed, 10 insertions, 21 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index e2401d4fe86..29cd8edc46f 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -409,21 +409,9 @@ int X86TTIImpl::getArithmeticInstrCost(
Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
MVT VT = LT.second;
// Vector shift left by non uniform constant can be lowered
- // into vector multiply (pmullw/pmulld).
- if ((VT == MVT::v8i16 && ST->hasSSE2()) ||
- (VT == MVT::v4i32 && ST->hasSSE41()))
- return LT.first;
-
- // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
- // sequence of extract + two vector multiply + insert.
- if ((VT == MVT::v8i32 || VT == MVT::v16i16) &&
- (ST->hasAVX() && !ST->hasAVX2()))
- ISD = ISD::MUL;
-
- // A vector shift left by non uniform constant is converted
- // into a vector multiply; the new multiply is eventually
- // lowered into a sequence of shuffles and 2 x pmuludq.
- if (VT == MVT::v4i32 && ST->hasSSE2())
+ // into vector multiply.
+ if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->hasSSE2()) ||
+ ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->hasAVX()))
ISD = ISD::MUL;
}
@@ -534,6 +522,7 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence.
{ ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence.
+ { ISD::MUL, MVT::v8i16, 1 }, // pmullw
{ ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle
{ ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add
@@ -549,13 +538,13 @@ int X86TTIImpl::getArithmeticInstrCost(
// generally a bad idea. Assume somewhat arbitrarily that we have to be able
// to hide "20 cycles" for each lane.
{ ISD::SDIV, MVT::v16i8, 16*20 },
- { ISD::SDIV, MVT::v8i16, 8*20 },
- { ISD::SDIV, MVT::v4i32, 4*20 },
- { ISD::SDIV, MVT::v2i64, 2*20 },
+ { ISD::SDIV, MVT::v8i16, 8*20 },
+ { ISD::SDIV, MVT::v4i32, 4*20 },
+ { ISD::SDIV, MVT::v2i64, 2*20 },
{ ISD::UDIV, MVT::v16i8, 16*20 },
- { ISD::UDIV, MVT::v8i16, 8*20 },
- { ISD::UDIV, MVT::v4i32, 4*20 },
- { ISD::UDIV, MVT::v2i64, 2*20 },
+ { ISD::UDIV, MVT::v8i16, 8*20 },
+ { ISD::UDIV, MVT::v4i32, 4*20 },
+ { ISD::UDIV, MVT::v2i64, 2*20 },
};
if (ST->hasSSE2())
OpenPOWER on IntegriCloud