diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-01-08 14:14:36 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-01-08 14:14:36 +0000 |
commit | 9c58950eeb0ac0ffe86b0ebdd43b2042d4fb9de6 (patch) | |
tree | 412bdbf49e6ae2cbe5205b0168d39f6399891dd9 | |
parent | 1fa5487c0529c80a66565ba99111644532c49aff (diff) | |
download | bcm5719-llvm-9c58950eeb0ac0ffe86b0ebdd43b2042d4fb9de6.tar.gz bcm5719-llvm-9c58950eeb0ac0ffe86b0ebdd43b2042d4fb9de6.zip |
[CostModel][X86] Fixed vXi8 uniform shift costs.
The 'fast' costs should only work for shifts by uniform constants (uniform non-constant are lowered using the slow default implementation).
Logical shifts were not taking into account that we must mask the psrlw result, so the costs needed to be doubled.
Added missing AVX2/AVX512BW costs as well.
llvm-svn: 291391
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 22 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/X86/testshiftlshr.ll | 4 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/X86/testshiftshl.ll | 4 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll | 18 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll | 28 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll | 30 |
6 files changed, 61 insertions, 45 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index f5ebfa00733..107ed935937 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -144,6 +144,10 @@ int X86TTIImpl::getArithmeticInstrCost( } static const CostTblEntry AVX512BWUniformConstCostTable[] = { + { ISD::SHL, MVT::v64i8, 2 }, // psllw + pand. + { ISD::SRL, MVT::v64i8, 2 }, // psrlw + pand. + { ISD::SRA, MVT::v64i8, 4 }, // psrlw, pand, pxor, psubb. + { ISD::SDIV, MVT::v32i16, 6 }, // vpmulhw sequence { ISD::UDIV, MVT::v32i16, 6 }, // vpmulhuw sequence }; @@ -168,6 +172,10 @@ int X86TTIImpl::getArithmeticInstrCost( } static const CostTblEntry AVX2UniformConstCostTable[] = { + { ISD::SHL, MVT::v32i8, 2 }, // psllw + pand. + { ISD::SRL, MVT::v32i8, 2 }, // psrlw + pand. + { ISD::SRA, MVT::v32i8, 4 }, // psrlw, pand, pxor, psubb. + { ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle. { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence @@ -184,6 +192,14 @@ int X86TTIImpl::getArithmeticInstrCost( } static const CostTblEntry SSE2UniformConstCostTable[] = { + { ISD::SHL, MVT::v16i8, 2 }, // psllw + pand. + { ISD::SRL, MVT::v16i8, 2 }, // psrlw + pand. + { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. + + { ISD::SHL, MVT::v32i8, 4 }, // 2*(psllw + pand). + { ISD::SRL, MVT::v32i8, 4 }, // 2*(psrlw + pand). + { ISD::SRA, MVT::v32i8, 8 }, // 2*(psrlw, pand, pxor, psubb). + { ISD::SDIV, MVT::v16i16, 12 }, // pmulhw sequence { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence { ISD::UDIV, MVT::v16i16, 12 }, // pmulhuw sequence @@ -364,20 +380,14 @@ int X86TTIImpl::getArithmeticInstrCost( static const CostTblEntry SSE2UniformShiftCostTable[] = { // Uniform splats are cheaper for the following instructions. - { ISD::SHL, MVT::v16i8, 1 }, // psllw. - { ISD::SHL, MVT::v32i8, 2 }, // psllw. { ISD::SHL, MVT::v16i16, 2 }, // psllw. { ISD::SHL, MVT::v8i32, 2 }, // pslld { ISD::SHL, MVT::v4i64, 2 }, // psllq. - { ISD::SRL, MVT::v16i8, 1 }, // psrlw. - { ISD::SRL, MVT::v32i8, 2 }, // psrlw. { ISD::SRL, MVT::v16i16, 2 }, // psrlw. { ISD::SRL, MVT::v8i32, 2 }, // psrld. { ISD::SRL, MVT::v4i64, 2 }, // psrlq. - { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. - { ISD::SRA, MVT::v32i8, 8 }, // psrlw, pand, pxor, psubb. { ISD::SRA, MVT::v16i16, 2 }, // psraw. { ISD::SRA, MVT::v8i32, 2 }, // psrad. { ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle. diff --git a/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll b/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll index 52f176fe4d6..e5fff9b5e4d 100644 --- a/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll +++ b/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll @@ -498,7 +498,7 @@ entry: define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) { entry: ; SSE2: shift16i8c - ; SSE2: cost of 1 {{.*}} lshr + ; SSE2: cost of 2 {{.*}} lshr ; SSE2-CODEGEN: shift16i8c ; SSE2-CODEGEN: psrlw $3 @@ -513,7 +513,7 @@ entry: define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) { entry: ; SSE2: shift32i8c - ; SSE2: cost of 2 {{.*}} lshr + ; SSE2: cost of 4 {{.*}} lshr ; SSE2-CODEGEN: shift32i8c ; SSE2-CODEGEN: psrlw $3 diff --git a/llvm/test/Analysis/CostModel/X86/testshiftshl.ll b/llvm/test/Analysis/CostModel/X86/testshiftshl.ll index e385c5bfeea..6628b9b8798 100644 --- a/llvm/test/Analysis/CostModel/X86/testshiftshl.ll +++ b/llvm/test/Analysis/CostModel/X86/testshiftshl.ll @@ -498,7 +498,7 @@ entry: define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) { entry: ; SSE2: shift16i8c - ; SSE2: cost of 1 {{.*}} shl + ; SSE2: cost of 2 {{.*}} shl ; SSE2-CODEGEN: shift16i8c ; SSE2-CODEGEN: psllw $3 @@ -513,7 +513,7 @@ entry: define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) { entry: ; SSE2: shift32i8c - ; SSE2: cost of 2 {{.*}} shl + ; SSE2: cost of 4 {{.*}} shl ; SSE2-CODEGEN: shift32i8c ; SSE2-CODEGEN: psllw $3 diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll index 3b87e6a9d8b..6756f3ba280 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -606,7 +606,7 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512: Found an estimated cost of 4 for instruction: %shift -; XOP: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <16 x i8> %shift } @@ -616,9 +616,10 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift ; AVX: Found an estimated cost of 8 for instruction: %shift -; AVX2: Found an estimated cost of 8 for instruction: %shift -; AVX512: Found an estimated cost of 8 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 4 for instruction: %shift +; AVX512: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 4 for instruction: %shift %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <32 x i8> %shift } @@ -628,10 +629,11 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 16 for instruction: %shift ; AVX: Found an estimated cost of 16 for instruction: %shift -; AVX2: Found an estimated cost of 16 for instruction: %shift -; AVX512F: Found an estimated cost of 16 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; AVX2: Found an estimated cost of 8 for instruction: %shift +; AVX512F: Found an estimated cost of 8 for instruction: %shift +; AVX512BW: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 16 for instruction: %shift +; XOPAVX2: Found an estimated cost of 8 for instruction: %shift %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <64 x i8> %shift } diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll index 2717fcf571b..63e6db194d5 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -611,11 +611,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8': -; SSE2: Found an estimated cost of 1 for instruction: %shift -; SSE41: Found an estimated cost of 1 for instruction: %shift -; AVX: Found an estimated cost of 1 for instruction: %shift -; AVX2: Found an estimated cost of 1 for instruction: %shift -; AVX512: Found an estimated cost of 1 for instruction: %shift +; SSE2: Found an estimated cost of 2 for instruction: %shift +; SSE41: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 2 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <16 x i8> %shift @@ -623,25 +623,27 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8': -; SSE2: Found an estimated cost of 2 for instruction: %shift -; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <32 x i8> %shift } define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8': -; SSE2: Found an estimated cost of 4 for instruction: %shift -; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; SSE2: Found an estimated cost of 8 for instruction: %shift +; SSE41: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512F: Found an estimated cost of 4 for instruction: %shift ; AVX512BW: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 4 for instruction: %shift %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <64 x i8> %shift } diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll index 98982225be8..8c42bd66c70 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll @@ -616,37 +616,39 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8': -; SSE2: Found an estimated cost of 1 for instruction: %shift -; SSE41: Found an estimated cost of 1 for instruction: %shift -; AVX: Found an estimated cost of 1 for instruction: %shift -; AVX2: Found an estimated cost of 1 for instruction: %shift -; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 1 for instruction: %shift +; SSE2: Found an estimated cost of 2 for instruction: %shift +; SSE41: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 2 for instruction: %shift %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <16 x i8> %shift } define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8': -; SSE2: Found an estimated cost of 2 for instruction: %shift -; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 2 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <32 x i8> %shift } define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8': -; SSE2: Found an estimated cost of 4 for instruction: %shift -; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; SSE2: Found an estimated cost of 8 for instruction: %shift +; SSE41: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512F: Found an estimated cost of 4 for instruction: %shift ; AVX512BW: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 4 for instruction: %shift %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <64 x i8> %shift } |