summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2015-07-08 23:40:55 +0000
committerSanjay Patel <spatel@rotateright.com>2015-07-08 23:40:55 +0000
commit13194461958672e09a4b0608186c7793a6aaf241 (patch)
tree04f05efaf789d66363984fe07b0d4c978689a28c /llvm/test
parent91e85d4327856aa9bdb0236caeb31c95099afa65 (diff)
downloadbcm5719-llvm-13194461958672e09a4b0608186c7793a6aaf241.tar.gz
bcm5719-llvm-13194461958672e09a4b0608186c7793a6aaf241.zip
[SLPVectorizer] Try different vectorization factors for store chains
...and set max vector register size based on target This patch is based on discussion on the llvmdev mailing list: http://lists.cs.uiuc.edu/pipermail/llvmdev/2015-July/087405.html and also solves: https://llvm.org/bugs/show_bug.cgi?id=17170 Several FIXME/TODO items are noted in comments as potential improvements. Differential Revision: http://reviews.llvm.org/D10950 llvm-svn: 241760
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll5
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/cse.ll29
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/gep.ll1
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll9
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll19
5 files changed, 39 insertions, 24 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll
index 9ed86f88147..35763953911 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll
@@ -1,4 +1,9 @@
; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -slp-vectorizer -dce < %s | FileCheck %s
+; XFAIL: *
+;
+; FIXME: If this test expects to be vectorized, the TTI must indicate that the target
+; has vector registers of the expected width.
+; Currently, it says there are 8 vector registers that are 32-bits wide.
target datalayout = "e-p:32:32:32-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
index 00e1ecd4ad3..8d25b3661dc 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
@@ -12,11 +12,8 @@ target triple = "i386-apple-macosx10.8.0"
;CHECK-LABEL: @test(
;CHECK: load <2 x double>
-;CHECK: fadd <2 x double>
-;CHECK: store <2 x double>
-;CHECK: insertelement <2 x double>
-;CHECK: fadd <2 x double>
-;CHECK: store <2 x double>
+;CHECK: fadd <4 x double>
+;CHECK: store <4 x double>
;CHECK: ret i32
define i32 @test(double* nocapture %G) {
@@ -48,11 +45,12 @@ entry:
; A[2] = A[2] * 7.6 * n + 3.0;
; A[3] = A[3] * 7.4 * n + 4.0;
;}
-;CHECK-LABEL: @foo(
-;CHECK: insertelement <2 x double>
-;CHECK: insertelement <2 x double>
-;CHECK-NOT: insertelement <2 x double>
-;CHECK: ret
+; CHECK-LABEL: @foo(
+; CHECK: load <4 x double>
+; CHECK: fmul <4 x double>
+; CHECK: fmul <4 x double>
+; CHECK: fadd <4 x double>
+; CHECK: store <4 x double>
define i32 @foo(double* nocapture %A, i32 %n) {
entry:
%0 = load double, double* %A, align 8
@@ -140,11 +138,12 @@ define i32 @test2(double* nocapture %G, i32 %k) {
; A[2] = A[2] * 7.9 * n + 6.0;
; A[3] = A[3] * 7.9 * n + 6.0;
;}
-;CHECK-LABEL: @foo4(
-;CHECK: insertelement <2 x double>
-;CHECK: insertelement <2 x double>
-;CHECK-NOT: insertelement <2 x double>
-;CHECK: ret
+; CHECK-LABEL: @foo4(
+; CHECK: load <4 x double>
+; CHECK: fmul <4 x double>
+; CHECK: fmul <4 x double>
+; CHECK: fadd <4 x double>
+; CHECK: store <4 x double>
define i32 @foo4(double* nocapture %A, i32 %n) {
entry:
%0 = load double, double* %A, align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gep.ll b/llvm/test/Transforms/SLPVectorizer/X86/gep.ll
index 3f952d7b242..d10f2b6015d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gep.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gep.ll
@@ -1,5 +1,6 @@
; RUN: opt < %s -basicaa -slp-vectorizer -S |FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
; Test if SLP can handle GEP expressions.
; The test perform the following action:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll b/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
index bbc315c24e1..dace4b35b87 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
@@ -4,12 +4,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-macosx10.8.0"
;CHECK-LABEL: @foo(
-;CHECK: load <4 x i32>
-;CHECK: add nsw <4 x i32>
-;CHECK: store <4 x i32>
-;CHECK: load <4 x i32>
-;CHECK: add nsw <4 x i32>
-;CHECK: store <4 x i32>
+;CHECK: load <8 x i32>
+;CHECK: add nsw <8 x i32>
+;CHECK: store <8 x i32>
;CHECK: ret
define i32 @foo(i32* nocapture %A, i32 %n) {
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll
index 0e404b2ad7b..32f8da4c7ee 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll
@@ -1,11 +1,24 @@
; RUN: opt < %s -basicaa -slp-vectorizer -S -mcpu=corei7-avx | FileCheck %s
+; RUN: opt < %s -basicaa -slp-vectorizer -slp-max-reg-size=128 -S -mcpu=corei7-avx | FileCheck %s --check-prefix=V128
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-; CHECK: load <2 x double>, <2 x double>*
-; CHECK: fadd <2 x double>
-; CHECK: store <2 x double>
+; CHECK-LABEL: @foo(
+; CHECK: load <4 x double>
+; CHECK: fadd <4 x double>
+; CHECK: fadd <4 x double>
+; CHECK: store <4 x double>
+
+; V128-LABEL: @foo(
+; V128: load <2 x double>
+; V128: fadd <2 x double>
+; V128: fadd <2 x double>
+; V128: store <2 x double>
+; V128: load <2 x double>
+; V128: fadd <2 x double>
+; V128: fadd <2 x double>
+; V128: store <2 x double>
define void @foo(double* %x) {
%1 = load double, double* %x, align 8
OpenPOWER on IntegriCloud