summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-01-07 22:27:43 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-01-07 22:27:43 +0000
commit9681c407b4f4a02308803a3c0119e27d44c82fd6 (patch)
tree0ff5f05ab9ac94ce90163821c11835e9b0112922
parenta74e3088dfdc3bd0242dcfb3712a3eeea67ad150 (diff)
downloadbcm5719-llvm-9681c407b4f4a02308803a3c0119e27d44c82fd6.tar.gz
bcm5719-llvm-9681c407b4f4a02308803a3c0119e27d44c82fd6.zip
[CostModel][X86] Update SSE41/AVX1 vXi32 SHL costs
SSE41 provides pmulld which allows the simpler pslld/paddd/cvttps2dq/pmulld pattern than SSE2's use of pmuludq. llvm-svn: 291372
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp2
-rw-r--r--llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll24
2 files changed, 14 insertions, 12 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 30b20555257..489aab349d4 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -489,6 +489,8 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SHL, MVT::v32i8, 2*11 }, // pblendvb sequence.
{ ISD::SHL, MVT::v8i16, 14 }, // pblendvb sequence.
{ ISD::SHL, MVT::v16i16, 2*14 }, // pblendvb sequence.
+ { ISD::SHL, MVT::v4i32, 4 }, // pslld/paddd/cvttps2dq/pmulld
+ { ISD::SHL, MVT::v8i32, 2*4 }, // pslld/paddd/cvttps2dq/pmulld
{ ISD::SRL, MVT::v16i8, 12 }, // pblendvb sequence.
{ ISD::SRL, MVT::v32i8, 2*12 }, // pblendvb sequence.
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
index 7090ae4a35e..98982225be8 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
@@ -57,8 +57,8 @@ define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
; SSE2: Found an estimated cost of 10 for instruction: %shift
-; SSE41: Found an estimated cost of 10 for instruction: %shift
-; AVX: Found an estimated cost of 10 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 1 for instruction: %shift
@@ -70,8 +70,8 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
; SSE2: Found an estimated cost of 20 for instruction: %shift
-; SSE41: Found an estimated cost of 20 for instruction: %shift
-; AVX: Found an estimated cost of 20 for instruction: %shift
+; SSE41: Found an estimated cost of 8 for instruction: %shift
+; AVX: Found an estimated cost of 8 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
@@ -83,8 +83,8 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32':
; SSE2: Found an estimated cost of 40 for instruction: %shift
-; SSE41: Found an estimated cost of 40 for instruction: %shift
-; AVX: Found an estimated cost of 40 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
; AVX2: Found an estimated cost of 2 for instruction: %shift
; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
@@ -216,8 +216,8 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
; SSE2: Found an estimated cost of 10 for instruction: %shift
-; SSE41: Found an estimated cost of 10 for instruction: %shift
-; AVX: Found an estimated cost of 10 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 1 for instruction: %shift
@@ -230,8 +230,8 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
; SSE2: Found an estimated cost of 20 for instruction: %shift
-; SSE41: Found an estimated cost of 20 for instruction: %shift
-; AVX: Found an estimated cost of 20 for instruction: %shift
+; SSE41: Found an estimated cost of 8 for instruction: %shift
+; AVX: Found an estimated cost of 8 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
@@ -244,8 +244,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i32':
; SSE2: Found an estimated cost of 40 for instruction: %shift
-; SSE41: Found an estimated cost of 40 for instruction: %shift
-; AVX: Found an estimated cost of 40 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
; AVX2: Found an estimated cost of 2 for instruction: %shift
; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
OpenPOWER on IntegriCloud