summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp4
-rw-r--r--llvm/test/Analysis/CostModel/X86/arith.ll16
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll6
3 files changed, 16 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 7029a02e682..5b3091eed3c 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -526,6 +526,10 @@ int X86TTIImpl::getArithmeticInstrCost(
// Two ops + 1 extract + 1 insert = 4.
{ ISD::MUL, MVT::v16i16, 4 },
{ ISD::MUL, MVT::v8i32, 4 },
+ { ISD::SUB, MVT::v32i8, 4 },
+ { ISD::ADD, MVT::v32i8, 4 },
+ { ISD::SUB, MVT::v16i16, 4 },
+ { ISD::ADD, MVT::v16i16, 4 },
{ ISD::SUB, MVT::v8i32, 4 },
{ ISD::ADD, MVT::v8i32, 4 },
{ ISD::SUB, MVT::v4i64, 4 },
diff --git a/llvm/test/Analysis/CostModel/X86/arith.ll b/llvm/test/Analysis/CostModel/X86/arith.ll
index 2f3f0f9aa73..aa204db3042 100644
--- a/llvm/test/Analysis/CostModel/X86/arith.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith.ll
@@ -57,13 +57,13 @@ define i32 @add(i32 %arg) {
%G = add <8 x i16> undef, undef
; SSSE3: cost of 2 {{.*}} %H = add
; SSE42: cost of 2 {{.*}} %H = add
- ; AVX: cost of 2 {{.*}} %H = add
+ ; AVX: cost of 4 {{.*}} %H = add
; AVX2: cost of 1 {{.*}} %H = add
; AVX512: cost of 1 {{.*}} %H = add
%H = add <16 x i16> undef, undef
; SSSE3: cost of 4 {{.*}} %I = add
; SSE42: cost of 4 {{.*}} %I = add
- ; AVX: cost of 4 {{.*}} %I = add
+ ; AVX: cost of 8 {{.*}} %I = add
; AVX2: cost of 2 {{.*}} %I = add
; AVX512F: cost of 2 {{.*}} %I = add
; AVX512BW: cost of 1 {{.*}} %I = add
@@ -77,13 +77,13 @@ define i32 @add(i32 %arg) {
%J = add <16 x i8> undef, undef
; SSSE3: cost of 2 {{.*}} %K = add
; SSE42: cost of 2 {{.*}} %K = add
- ; AVX: cost of 2 {{.*}} %K = add
+ ; AVX: cost of 4 {{.*}} %K = add
; AVX2: cost of 1 {{.*}} %K = add
; AVX512: cost of 1 {{.*}} %K = add
%K = add <32 x i8> undef, undef
; SSSE3: cost of 4 {{.*}} %L = add
; SSE42: cost of 4 {{.*}} %L = add
- ; AVX: cost of 4 {{.*}} %L = add
+ ; AVX: cost of 8 {{.*}} %L = add
; AVX2: cost of 2 {{.*}} %L = add
; AVX512F: cost of 2 {{.*}} %L = add
; AVX512BW: cost of 1 {{.*}} %L = add
@@ -140,13 +140,13 @@ define i32 @sub(i32 %arg) {
%G = sub <8 x i16> undef, undef
; SSSE3: cost of 2 {{.*}} %H = sub
; SSE42: cost of 2 {{.*}} %H = sub
- ; AVX: cost of 2 {{.*}} %H = sub
+ ; AVX: cost of 4 {{.*}} %H = sub
; AVX2: cost of 1 {{.*}} %H = sub
; AVX512: cost of 1 {{.*}} %H = sub
%H = sub <16 x i16> undef, undef
; SSSE3: cost of 4 {{.*}} %I = sub
; SSE42: cost of 4 {{.*}} %I = sub
- ; AVX: cost of 4 {{.*}} %I = sub
+ ; AVX: cost of 8 {{.*}} %I = sub
; AVX2: cost of 2 {{.*}} %I = sub
; AVX512F: cost of 2 {{.*}} %I = sub
; AVX512BW: cost of 1 {{.*}} %I = sub
@@ -160,13 +160,13 @@ define i32 @sub(i32 %arg) {
%J = sub <16 x i8> undef, undef
; SSSE3: cost of 2 {{.*}} %K = sub
; SSE42: cost of 2 {{.*}} %K = sub
- ; AVX: cost of 2 {{.*}} %K = sub
+ ; AVX: cost of 4 {{.*}} %K = sub
; AVX2: cost of 1 {{.*}} %K = sub
; AVX512: cost of 1 {{.*}} %K = sub
%K = sub <32 x i8> undef, undef
; SSSE3: cost of 4 {{.*}} %L = sub
; SSE42: cost of 4 {{.*}} %L = sub
- ; AVX: cost of 4 {{.*}} %L = sub
+ ; AVX: cost of 8 {{.*}} %L = sub
; AVX2: cost of 2 {{.*}} %L = sub
; AVX512F: cost of 2 {{.*}} %L = sub
; AVX512BW: cost of 1 {{.*}} %L = sub
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll b/llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
index fe9d59efc8b..a32cc46e913 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
@@ -1,4 +1,5 @@
-; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -mcpu=corei7-avx -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s
+; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -mcpu=corei7-avx -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-AVX1
+; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -mcpu=core-avx2 -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-AVX2
; REQUIRES: asserts
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -16,7 +17,8 @@ target triple = "x86_64-unknown-linux-gnu"
; -vectorizer-maximize-bandwidth is indicated.
;
; CHECK-label: foo
-; CHECK: LV: Selecting VF: 32.
+; CHECK-AVX1: LV: Selecting VF: 16.
+; CHECK-AVX2: LV: Selecting VF: 32.
define void @foo() {
entry:
br label %for.body
OpenPOWER on IntegriCloud