diff options
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 22 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/X86/testshiftlshr.ll | 4 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/X86/testshiftshl.ll | 4 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll | 18 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll | 28 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll | 30 |
6 files changed, 61 insertions, 45 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index f5ebfa00733..107ed935937 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -144,6 +144,10 @@ int X86TTIImpl::getArithmeticInstrCost( } static const CostTblEntry AVX512BWUniformConstCostTable[] = { + { ISD::SHL, MVT::v64i8, 2 }, // psllw + pand. + { ISD::SRL, MVT::v64i8, 2 }, // psrlw + pand. + { ISD::SRA, MVT::v64i8, 4 }, // psrlw, pand, pxor, psubb. + { ISD::SDIV, MVT::v32i16, 6 }, // vpmulhw sequence { ISD::UDIV, MVT::v32i16, 6 }, // vpmulhuw sequence }; @@ -168,6 +172,10 @@ int X86TTIImpl::getArithmeticInstrCost( } static const CostTblEntry AVX2UniformConstCostTable[] = { + { ISD::SHL, MVT::v32i8, 2 }, // psllw + pand. + { ISD::SRL, MVT::v32i8, 2 }, // psrlw + pand. + { ISD::SRA, MVT::v32i8, 4 }, // psrlw, pand, pxor, psubb. + { ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle. { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence @@ -184,6 +192,14 @@ int X86TTIImpl::getArithmeticInstrCost( } static const CostTblEntry SSE2UniformConstCostTable[] = { + { ISD::SHL, MVT::v16i8, 2 }, // psllw + pand. + { ISD::SRL, MVT::v16i8, 2 }, // psrlw + pand. + { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. + + { ISD::SHL, MVT::v32i8, 4 }, // 2*(psllw + pand). + { ISD::SRL, MVT::v32i8, 4 }, // 2*(psrlw + pand). + { ISD::SRA, MVT::v32i8, 8 }, // 2*(psrlw, pand, pxor, psubb). + { ISD::SDIV, MVT::v16i16, 12 }, // pmulhw sequence { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence { ISD::UDIV, MVT::v16i16, 12 }, // pmulhuw sequence @@ -364,20 +380,14 @@ int X86TTIImpl::getArithmeticInstrCost( static const CostTblEntry SSE2UniformShiftCostTable[] = { // Uniform splats are cheaper for the following instructions. - { ISD::SHL, MVT::v16i8, 1 }, // psllw. - { ISD::SHL, MVT::v32i8, 2 }, // psllw. { ISD::SHL, MVT::v16i16, 2 }, // psllw. { ISD::SHL, MVT::v8i32, 2 }, // pslld { ISD::SHL, MVT::v4i64, 2 }, // psllq. - { ISD::SRL, MVT::v16i8, 1 }, // psrlw. - { ISD::SRL, MVT::v32i8, 2 }, // psrlw. { ISD::SRL, MVT::v16i16, 2 }, // psrlw. { ISD::SRL, MVT::v8i32, 2 }, // psrld. { ISD::SRL, MVT::v4i64, 2 }, // psrlq. - { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. - { ISD::SRA, MVT::v32i8, 8 }, // psrlw, pand, pxor, psubb. { ISD::SRA, MVT::v16i16, 2 }, // psraw. { ISD::SRA, MVT::v8i32, 2 }, // psrad. { ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle. diff --git a/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll b/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll index 52f176fe4d6..e5fff9b5e4d 100644 --- a/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll +++ b/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll @@ -498,7 +498,7 @@ entry: define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) { entry: ; SSE2: shift16i8c - ; SSE2: cost of 1 {{.*}} lshr + ; SSE2: cost of 2 {{.*}} lshr ; SSE2-CODEGEN: shift16i8c ; SSE2-CODEGEN: psrlw $3 @@ -513,7 +513,7 @@ entry: define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) { entry: ; SSE2: shift32i8c - ; SSE2: cost of 2 {{.*}} lshr + ; SSE2: cost of 4 {{.*}} lshr ; SSE2-CODEGEN: shift32i8c ; SSE2-CODEGEN: psrlw $3 diff --git a/llvm/test/Analysis/CostModel/X86/testshiftshl.ll b/llvm/test/Analysis/CostModel/X86/testshiftshl.ll index e385c5bfeea..6628b9b8798 100644 --- a/llvm/test/Analysis/CostModel/X86/testshiftshl.ll +++ b/llvm/test/Analysis/CostModel/X86/testshiftshl.ll @@ -498,7 +498,7 @@ entry: define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) { entry: ; SSE2: shift16i8c - ; SSE2: cost of 1 {{.*}} shl + ; SSE2: cost of 2 {{.*}} shl ; SSE2-CODEGEN: shift16i8c ; SSE2-CODEGEN: psllw $3 @@ -513,7 +513,7 @@ entry: define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) { entry: ; SSE2: shift32i8c - ; SSE2: cost of 2 {{.*}} shl + ; SSE2: cost of 4 {{.*}} shl ; SSE2-CODEGEN: shift32i8c ; SSE2-CODEGEN: psllw $3 diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll index 3b87e6a9d8b..6756f3ba280 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -606,7 +606,7 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512: Found an estimated cost of 4 for instruction: %shift -; XOP: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <16 x i8> %shift } @@ -616,9 +616,10 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift ; AVX: Found an estimated cost of 8 for instruction: %shift -; AVX2: Found an estimated cost of 8 for instruction: %shift -; AVX512: Found an estimated cost of 8 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 4 for instruction: %shift +; AVX512: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 4 for instruction: %shift %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <32 x i8> %shift } @@ -628,10 +629,11 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 16 for instruction: %shift ; AVX: Found an estimated cost of 16 for instruction: %shift -; AVX2: Found an estimated cost of 16 for instruction: %shift -; AVX512F: Found an estimated cost of 16 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; AVX2: Found an estimated cost of 8 for instruction: %shift +; AVX512F: Found an estimated cost of 8 for instruction: %shift +; AVX512BW: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 16 for instruction: %shift +; XOPAVX2: Found an estimated cost of 8 for instruction: %shift %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <64 x i8> %shift } diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll index 2717fcf571b..63e6db194d5 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -611,11 +611,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8': -; SSE2: Found an estimated cost of 1 for instruction: %shift -; SSE41: Found an estimated cost of 1 for instruction: %shift -; AVX: Found an estimated cost of 1 for instruction: %shift -; AVX2: Found an estimated cost of 1 for instruction: %shift -; AVX512: Found an estimated cost of 1 for instruction: %shift +; SSE2: Found an estimated cost of 2 for instruction: %shift +; SSE41: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 2 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <16 x i8> %shift @@ -623,25 +623,27 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8': -; SSE2: Found an estimated cost of 2 for instruction: %shift -; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <32 x i8> %shift } define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8': -; SSE2: Found an estimated cost of 4 for instruction: %shift -; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; SSE2: Found an estimated cost of 8 for instruction: %shift +; SSE41: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512F: Found an estimated cost of 4 for instruction: %shift ; AVX512BW: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 4 for instruction: %shift %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <64 x i8> %shift } diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll index 98982225be8..8c42bd66c70 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll @@ -616,37 +616,39 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8': -; SSE2: Found an estimated cost of 1 for instruction: %shift -; SSE41: Found an estimated cost of 1 for instruction: %shift -; AVX: Found an estimated cost of 1 for instruction: %shift -; AVX2: Found an estimated cost of 1 for instruction: %shift -; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 1 for instruction: %shift +; SSE2: Found an estimated cost of 2 for instruction: %shift +; SSE41: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 2 for instruction: %shift %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <16 x i8> %shift } define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8': -; SSE2: Found an estimated cost of 2 for instruction: %shift -; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 2 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <32 x i8> %shift } define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8': -; SSE2: Found an estimated cost of 4 for instruction: %shift -; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; SSE2: Found an estimated cost of 8 for instruction: %shift +; SSE41: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512F: Found an estimated cost of 4 for instruction: %shift ; AVX512BW: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 4 for instruction: %shift %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <64 x i8> %shift } |