diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 63 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll | 5 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll | 8 |
3 files changed, 44 insertions, 32 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 489aab349d4..f5ebfa00733 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -207,6 +207,43 @@ int X86TTIImpl::getArithmeticInstrCost( return LT.first * Entry->Cost; } + static const CostTblEntry AVX2UniformCostTable[] = { + // Uniform splats are cheaper for the following instructions. + { ISD::SHL, MVT::v16i16, 1 }, // psllw. + { ISD::SRL, MVT::v16i16, 1 }, // psrlw. + { ISD::SRA, MVT::v16i16, 1 }, // psraw. + }; + + if (ST->hasAVX2() && + ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || + (Op2Info == TargetTransformInfo::OK_UniformValue))) { + if (const auto *Entry = + CostTableLookup(AVX2UniformCostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + } + + static const CostTblEntry SSE2UniformCostTable[] = { + // Uniform splats are cheaper for the following instructions. + { ISD::SHL, MVT::v8i16, 1 }, // psllw. + { ISD::SHL, MVT::v4i32, 1 }, // pslld + { ISD::SHL, MVT::v2i64, 1 }, // psllq. + + { ISD::SRL, MVT::v8i16, 1 }, // psrlw. + { ISD::SRL, MVT::v4i32, 1 }, // psrld. + { ISD::SRL, MVT::v2i64, 1 }, // psrlq. + + { ISD::SRA, MVT::v8i16, 1 }, // psraw. + { ISD::SRA, MVT::v4i32, 1 }, // psrad. + }; + + if (ST->hasSSE2() && + ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || + (Op2Info == TargetTransformInfo::OK_UniformValue))) { + if (const auto *Entry = + CostTableLookup(SSE2UniformCostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + } + static const CostTblEntry AVX512DQCostTable[] = { { ISD::MUL, MVT::v2i64, 1 }, { ISD::MUL, MVT::v4i64, 1 }, @@ -291,20 +328,6 @@ int X86TTIImpl::getArithmeticInstrCost( return LT.first * Entry->Cost; } - static const CostTblEntry AVX2UniformCostTable[] = { - // Uniform splats are cheaper for the following instructions. - { ISD::SRL, MVT::v16i16, 1 }, // psrlw. - { ISD::SRA, MVT::v16i16, 1 }, // psraw. - }; - - if (ST->hasAVX2() && - ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || - (Op2Info == TargetTransformInfo::OK_UniformValue))) { - if (const auto *Entry = - CostTableLookup(AVX2UniformCostTable, ISD, LT.second)) - return LT.first * Entry->Cost; - } - static const CostTblEntry XOPShiftCostTable[] = { // 128bit shifts take 1cy, but right shifts require negation beforehand. { ISD::SHL, MVT::v16i8, 1 }, @@ -339,31 +362,23 @@ int X86TTIImpl::getArithmeticInstrCost( if (const auto *Entry = CostTableLookup(XOPShiftCostTable, ISD, LT.second)) return LT.first * Entry->Cost; - static const CostTblEntry SSE2UniformCostTable[] = { + static const CostTblEntry SSE2UniformShiftCostTable[] = { // Uniform splats are cheaper for the following instructions. { ISD::SHL, MVT::v16i8, 1 }, // psllw. { ISD::SHL, MVT::v32i8, 2 }, // psllw. - { ISD::SHL, MVT::v8i16, 1 }, // psllw. { ISD::SHL, MVT::v16i16, 2 }, // psllw. - { ISD::SHL, MVT::v4i32, 1 }, // pslld { ISD::SHL, MVT::v8i32, 2 }, // pslld - { ISD::SHL, MVT::v2i64, 1 }, // psllq. { ISD::SHL, MVT::v4i64, 2 }, // psllq. { ISD::SRL, MVT::v16i8, 1 }, // psrlw. { ISD::SRL, MVT::v32i8, 2 }, // psrlw. - { ISD::SRL, MVT::v8i16, 1 }, // psrlw. { ISD::SRL, MVT::v16i16, 2 }, // psrlw. - { ISD::SRL, MVT::v4i32, 1 }, // psrld. { ISD::SRL, MVT::v8i32, 2 }, // psrld. - { ISD::SRL, MVT::v2i64, 1 }, // psrlq. { ISD::SRL, MVT::v4i64, 2 }, // psrlq. { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. { ISD::SRA, MVT::v32i8, 8 }, // psrlw, pand, pxor, psubb. - { ISD::SRA, MVT::v8i16, 1 }, // psraw. { ISD::SRA, MVT::v16i16, 2 }, // psraw. - { ISD::SRA, MVT::v4i32, 1 }, // psrad. { ISD::SRA, MVT::v8i32, 2 }, // psrad. { ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle. { ISD::SRA, MVT::v4i64, 8 }, // 2 x psrad + shuffle. @@ -373,7 +388,7 @@ int X86TTIImpl::getArithmeticInstrCost( ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || (Op2Info == TargetTransformInfo::OK_UniformValue))) { if (const auto *Entry = - CostTableLookup(SSE2UniformCostTable, ISD, LT.second)) + CostTableLookup(SSE2UniformShiftCostTable, ISD, LT.second)) return LT.first * Entry->Cost; } diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll index e75b5dc3ddc..3b87e6a9d8b 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -529,8 +529,7 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 2 for instruction: %shift -; XOPAVX2: Found an estimated cost of 1 for instruction: %shift +; XOP: Found an estimated cost of 1 for instruction: %shift %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5> ret <4 x i32> %shift } @@ -568,7 +567,7 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 1 for instruction: %shift %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> ret <8 x i16> %shift } diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll index c1c674875c9..2717fcf571b 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -501,8 +501,7 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 2 for instruction: %shift -; XOPAVX2: Found an estimated cost of 1 for instruction: %shift +; XOP: Found an estimated cost of 1 for instruction: %shift %shift = lshr <2 x i64> %a, <i64 7, i64 7> ret <2 x i64> %shift } @@ -540,8 +539,7 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 2 for instruction: %shift -; XOPAVX2: Found an estimated cost of 1 for instruction: %shift +; XOP: Found an estimated cost of 1 for instruction: %shift %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5> ret <4 x i32> %shift } @@ -579,7 +577,7 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 1 for instruction: %shift %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> ret <8 x i16> %shift } |