summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp6
-rw-r--r--llvm/test/Analysis/CostModel/X86/testshiftlshr.ll16
-rw-r--r--llvm/test/Analysis/CostModel/X86/testshiftshl.ll16
-rw-r--r--llvm/test/Analysis/CostModel/X86/vshift-cost.ll18
4 files changed, 28 insertions, 28 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 7df72609184..7cda54d6b00 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -259,13 +259,13 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
{ ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence.
{ ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence.
{ ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
- { ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized.
- { ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized.
+ { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence.
+ { ISD::SHL, MVT::v4i64, 8 }, // splat+shuffle sequence.
{ ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence.
{ ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence.
{ ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend.
- { ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized.
+ { ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence.
{ ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence.
{ ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence.
diff --git a/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll b/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll
index 5775a42d08a..52f176fe4d6 100644
--- a/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll
+++ b/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll
@@ -5,7 +5,7 @@
define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
entry:
; SSE2: shift2i16
- ; SSE2: cost of 20 {{.*}} lshr
+ ; SSE2: cost of 4 {{.*}} lshr
; SSE2-CODEGEN: shift2i16
; SSE2-CODEGEN: psrlq
@@ -65,7 +65,7 @@ entry:
define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
entry:
; SSE2: shift2i32
- ; SSE2: cost of 20 {{.*}} lshr
+ ; SSE2: cost of 4 {{.*}} lshr
; SSE2-CODEGEN: shift2i32
; SSE2-CODEGEN: psrlq
@@ -125,7 +125,7 @@ entry:
define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
entry:
; SSE2: shift2i64
- ; SSE2: cost of 20 {{.*}} lshr
+ ; SSE2: cost of 4 {{.*}} lshr
; SSE2-CODEGEN: shift2i64
; SSE2-CODEGEN: psrlq
@@ -137,7 +137,7 @@ entry:
define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
entry:
; SSE2: shift4i64
- ; SSE2: cost of 40 {{.*}} lshr
+ ; SSE2: cost of 8 {{.*}} lshr
; SSE2-CODEGEN: shift4i64
; SSE2-CODEGEN: psrlq
@@ -149,7 +149,7 @@ entry:
define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
entry:
; SSE2: shift8i64
- ; SSE2: cost of 80 {{.*}} lshr
+ ; SSE2: cost of 16 {{.*}} lshr
; SSE2-CODEGEN: shift8i64
; SSE2-CODEGEN: psrlq
@@ -161,7 +161,7 @@ entry:
define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
entry:
; SSE2: shift16i64
- ; SSE2: cost of 160 {{.*}} lshr
+ ; SSE2: cost of 32 {{.*}} lshr
; SSE2-CODEGEN: shift16i64
; SSE2-CODEGEN: psrlq
@@ -173,7 +173,7 @@ entry:
define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
entry:
; SSE2: shift32i64
- ; SSE2: cost of 320 {{.*}} lshr
+ ; SSE2: cost of 64 {{.*}} lshr
; SSE2-CODEGEN: shift32i64
; SSE2-CODEGEN: psrlq
@@ -185,7 +185,7 @@ entry:
define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
entry:
; SSE2: shift2i8
- ; SSE2: cost of 20 {{.*}} lshr
+ ; SSE2: cost of 4 {{.*}} lshr
; SSE2-CODEGEN: shift2i8
; SSE2-CODEGEN: psrlq
diff --git a/llvm/test/Analysis/CostModel/X86/testshiftshl.ll b/llvm/test/Analysis/CostModel/X86/testshiftshl.ll
index d4e33818932..e385c5bfeea 100644
--- a/llvm/test/Analysis/CostModel/X86/testshiftshl.ll
+++ b/llvm/test/Analysis/CostModel/X86/testshiftshl.ll
@@ -5,7 +5,7 @@
define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
entry:
; SSE2: shift2i16
- ; SSE2: cost of 20 {{.*}} shl
+ ; SSE2: cost of 4 {{.*}} shl
; SSE2-CODEGEN: shift2i16
; SSE2-CODEGEN: psllq
@@ -65,7 +65,7 @@ entry:
define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
entry:
; SSE2: shift2i32
- ; SSE2: cost of 20 {{.*}} shl
+ ; SSE2: cost of 4 {{.*}} shl
; SSE2-CODEGEN: shift2i32
; SSE2-CODEGEN: psllq
@@ -125,7 +125,7 @@ entry:
define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
entry:
; SSE2: shift2i64
- ; SSE2: cost of 20 {{.*}} shl
+ ; SSE2: cost of 4 {{.*}} shl
; SSE2-CODEGEN: shift2i64
; SSE2-CODEGEN: psllq
@@ -137,7 +137,7 @@ entry:
define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
entry:
; SSE2: shift4i64
- ; SSE2: cost of 40 {{.*}} shl
+ ; SSE2: cost of 8 {{.*}} shl
; SSE2-CODEGEN: shift4i64
; SSE2-CODEGEN: psllq
@@ -149,7 +149,7 @@ entry:
define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
entry:
; SSE2: shift8i64
- ; SSE2: cost of 80 {{.*}} shl
+ ; SSE2: cost of 16 {{.*}} shl
; SSE2-CODEGEN: shift8i64
; SSE2-CODEGEN: psllq
@@ -161,7 +161,7 @@ entry:
define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
entry:
; SSE2: shift16i64
- ; SSE2: cost of 160 {{.*}} shl
+ ; SSE2: cost of 32 {{.*}} shl
; SSE2-CODEGEN: shift16i64
; SSE2-CODEGEN: psllq
@@ -173,7 +173,7 @@ entry:
define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
entry:
; SSE2: shift32i64
- ; SSE2: cost of 320 {{.*}} shl
+ ; SSE2: cost of 64 {{.*}} shl
; SSE2-CODEGEN: shift32i64
; SSE2-CODEGEN: psllq
@@ -185,7 +185,7 @@ entry:
define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
entry:
; SSE2: shift2i8
- ; SSE2: cost of 20 {{.*}} shl
+ ; SSE2: cost of 4 {{.*}} shl
; SSE2-CODEGEN: shift2i8
; SSE2-CODEGEN: psllq
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-cost.ll
index 84d72463ac0..dd93badc893 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-cost.ll
@@ -62,9 +62,9 @@ define <2 x i64> @test5(<2 x i64> %a) {
ret <2 x i64> %shl
}
; CHECK: 'Cost Model Analysis' for function 'test5':
-; SSE2: Found an estimated cost of 20 for instruction: %shl
-; SSE41: Found an estimated cost of 20 for instruction: %shl
-; AVX: Found an estimated cost of 20 for instruction: %shl
+; SSE2: Found an estimated cost of 4 for instruction: %shl
+; SSE41: Found an estimated cost of 4 for instruction: %shl
+; AVX: Found an estimated cost of 4 for instruction: %shl
; AVX2: Found an estimated cost of 1 for instruction: %shl
@@ -117,9 +117,9 @@ define <4 x i64> @test8(<4 x i64> %a) {
ret <4 x i64> %shl
}
; CHECK: 'Cost Model Analysis' for function 'test8':
-; SSE2: Found an estimated cost of 40 for instruction: %shl
-; SSE41: Found an estimated cost of 40 for instruction: %shl
-; AVX: Found an estimated cost of 40 for instruction: %shl
+; SSE2: Found an estimated cost of 8 for instruction: %shl
+; SSE41: Found an estimated cost of 8 for instruction: %shl
+; AVX: Found an estimated cost of 8 for instruction: %shl
; AVX2: Found an estimated cost of 1 for instruction: %shl
@@ -159,9 +159,9 @@ define <8 x i64> @test11(<8 x i64> %a) {
ret <8 x i64> %shl
}
; CHECK: 'Cost Model Analysis' for function 'test11':
-; SSE2: Found an estimated cost of 80 for instruction: %shl
-; SSE41: Found an estimated cost of 80 for instruction: %shl
-; AVX: Found an estimated cost of 80 for instruction: %shl
+; SSE2: Found an estimated cost of 16 for instruction: %shl
+; SSE41: Found an estimated cost of 16 for instruction: %shl
+; AVX: Found an estimated cost of 16 for instruction: %shl
; AVX2: Found an estimated cost of 2 for instruction: %shl
OpenPOWER on IntegriCloud