summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoadStoreVectorizer/X86
diff options
context:
space:
mode:
authorEric Christopher <echristo@gmail.com>2019-04-17 02:12:23 +0000
committerEric Christopher <echristo@gmail.com>2019-04-17 02:12:23 +0000
commita86343512845c9c1fdbac865fea88aa5fce7142a (patch)
tree666fc6353de19ad8b00e56b67edd33f24104e4a7 /llvm/test/Transforms/LoadStoreVectorizer/X86
parent7f8ca6e3679b3af951cb7a4b1377edfaa3244b93 (diff)
downloadbcm5719-llvm-a86343512845c9c1fdbac865fea88aa5fce7142a.tar.gz
bcm5719-llvm-a86343512845c9c1fdbac865fea88aa5fce7142a.zip
Temporarily Revert "Add basic loop fusion pass."
As it's causing some bot failures (and per request from kbarton). This reverts commit r358543/ab70da07286e618016e78247e4a24fcb84077fda. llvm-svn: 358546
Diffstat (limited to 'llvm/test/Transforms/LoadStoreVectorizer/X86')
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/X86/codegenprepare-produced-address-math.ll80
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/X86/compare-scev-by-complexity.ll77
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll28
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/X86/lit.local.cfg3
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/X86/load-width.ll40
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/X86/merge-tbaa.ll48
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/X86/non-byte-size.ll31
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll29
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll78
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll118
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/X86/vector-scalar.ll15
11 files changed, 0 insertions, 547 deletions
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/codegenprepare-produced-address-math.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/codegenprepare-produced-address-math.ll
deleted file mode 100644
index e29f3dfa537..00000000000
--- a/llvm/test/Transforms/LoadStoreVectorizer/X86/codegenprepare-produced-address-math.ll
+++ /dev/null
@@ -1,80 +0,0 @@
-; RUN: opt -codegenprepare -load-store-vectorizer %s -S -o - | FileCheck %s
-; RUN: opt -load-store-vectorizer %s -S -o - | FileCheck %s
-; RUN: opt -codegenprepare -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' %s -S -o - | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' %s -S -o - | FileCheck %s
-
-target triple = "x86_64--"
-
-%union = type { { [4 x [4 x [4 x [16 x float]]]], [4 x [4 x [4 x [16 x float]]]], [10 x [10 x [4 x float]]] } }
-
-@global_pointer = external unnamed_addr global { %union, [2000 x i8] }, align 4
-
-; Function Attrs: convergent nounwind
-define void @test(i32 %base) #0 {
-; CHECK-LABEL: @test(
-; CHECK-NOT: load i32
-; CHECK: load <2 x i32>
-; CHECK-NOT: load i32
-entry:
- %mul331 = and i32 %base, -4
- %add350.4 = add i32 4, %mul331
- %idx351.4 = zext i32 %add350.4 to i64
- %arrayidx352.4 = getelementptr inbounds { %union, [2000 x i8] }, { %union, [2000 x i8] }* @global_pointer, i64 0, i32 0, i32 0, i32 1, i64 0, i64 0, i64 0, i64 %idx351.4
- %tmp296.4 = bitcast float* %arrayidx352.4 to i32*
- %add350.5 = add i32 5, %mul331
- %idx351.5 = zext i32 %add350.5 to i64
- %arrayidx352.5 = getelementptr inbounds { %union, [2000 x i8] }, { %union, [2000 x i8] }* @global_pointer, i64 0, i32 0, i32 0, i32 1, i64 0, i64 0, i64 0, i64 %idx351.5
- %tmp296.5 = bitcast float* %arrayidx352.5 to i32*
- %cnd = icmp ult i32 %base, 1000
- br i1 %cnd, label %loads, label %exit
-
-loads:
- ; If and only if the loads are in a different BB from the GEPs codegenprepare
- ; would try to turn the GEPs into math, which makes LoadStoreVectorizer's job
- ; harder
- %tmp297.4 = load i32, i32* %tmp296.4, align 4, !tbaa !0
- %tmp297.5 = load i32, i32* %tmp296.5, align 4, !tbaa !0
- br label %exit
-
-exit:
- ret void
-}
-
-; Function Attrs: convergent nounwind
-define void @test.codegenprepared(i32 %base) #0 {
-; CHECK-LABEL: @test.codegenprepared(
-; CHECK-NOT: load i32
-; CHECK: load <2 x i32>
-; CHECK-NOT: load i32
-entry:
- %mul331 = and i32 %base, -4
- %add350.4 = add i32 4, %mul331
- %idx351.4 = zext i32 %add350.4 to i64
- %add350.5 = add i32 5, %mul331
- %idx351.5 = zext i32 %add350.5 to i64
- %cnd = icmp ult i32 %base, 1000
- br i1 %cnd, label %loads, label %exit
-
-loads: ; preds = %entry
- %sunkaddr = mul i64 %idx351.4, 4
- %sunkaddr1 = getelementptr inbounds i8, i8* bitcast ({ %union, [2000 x i8] }* @global_pointer to i8*), i64 %sunkaddr
- %sunkaddr2 = getelementptr inbounds i8, i8* %sunkaddr1, i64 4096
- %0 = bitcast i8* %sunkaddr2 to i32*
- %tmp297.4 = load i32, i32* %0, align 4, !tbaa !0
- %sunkaddr3 = mul i64 %idx351.5, 4
- %sunkaddr4 = getelementptr inbounds i8, i8* bitcast ({ %union, [2000 x i8] }* @global_pointer to i8*), i64 %sunkaddr3
- %sunkaddr5 = getelementptr inbounds i8, i8* %sunkaddr4, i64 4096
- %1 = bitcast i8* %sunkaddr5 to i32*
- %tmp297.5 = load i32, i32* %1, align 4, !tbaa !0
- br label %exit
-
-exit: ; preds = %loads, %entry
- ret void
-}
-
-attributes #0 = { convergent nounwind }
-
-!0 = !{!1, !1, i64 0}
-!1 = !{!"float", !2, i64 0}
-!2 = !{!"omnipotent char", !3, i64 0}
-!3 = !{!"Simple C++ TBAA"}
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/compare-scev-by-complexity.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/compare-scev-by-complexity.ll
deleted file mode 100644
index e2181f6086c..00000000000
--- a/llvm/test/Transforms/LoadStoreVectorizer/X86/compare-scev-by-complexity.ll
+++ /dev/null
@@ -1,77 +0,0 @@
-; RUN: opt -load-store-vectorizer %s -S | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' %s -S | FileCheck %s
-
-; Check that setting wrapping flags after a SCEV node is created
-; does not invalidate "sorted by complexity" invariant for
-; operands of commutative and associative SCEV operators.
-
-target triple = "x86_64--"
-
-@global_value0 = external constant i32
-@global_value1 = external constant i32
-@other_value = external global float
-@a = external global float
-@b = external global float
-@c = external global float
-@d = external global float
-@plus1 = external global i32
-@cnd = external global i8
-
-; Function Attrs: nounwind
-define void @main() local_unnamed_addr #0 {
-; CHECK-LABEL: @main()
-; CHECK: [[PTR:%[0-9]+]] = bitcast float* %preheader.load0.address to <2 x float>*
-; CHECK: = load <2 x float>, <2 x float>* [[PTR]]
-; CHECK-LABEL: for.body23:
-entry:
- %tmp = load i32, i32* @global_value0, !range !0
- %tmp2 = load i32, i32* @global_value1
- %and.i.i = and i32 %tmp2, 2
- %add.nuw.nsw.i.i = add nuw nsw i32 %and.i.i, 0
- %mul.i.i = shl nuw nsw i32 %add.nuw.nsw.i.i, 1
- %and6.i.i = and i32 %tmp2, 3
- %and9.i.i = and i32 %tmp2, 4
- %add.nuw.nsw10.i.i = add nuw nsw i32 %and6.i.i, %and9.i.i
- %conv3.i42.i = add nuw nsw i32 %mul.i.i, 1
- %reass.add346.7 = add nuw nsw i32 %add.nuw.nsw10.i.i, 56
- %reass.mul347.7 = mul nuw nsw i32 %tmp, %reass.add346.7
- %add7.i.7 = add nuw nsw i32 %reass.mul347.7, 0
- %preheader.address0.idx = add nuw nsw i32 %add7.i.7, %mul.i.i
- %preheader.address0.idx.zext = zext i32 %preheader.address0.idx to i64
- %preheader.load0.address = getelementptr inbounds float, float* @other_value, i64 %preheader.address0.idx.zext
- %preheader.load0. = load float, float* %preheader.load0.address, align 4, !tbaa !1
- %common.address.idx = add nuw nsw i32 %add7.i.7, %conv3.i42.i
- %preheader.header.common.address.idx.zext = zext i32 %common.address.idx to i64
- %preheader.load1.address = getelementptr inbounds float, float* @other_value, i64 %preheader.header.common.address.idx.zext
- %preheader.load1. = load float, float* %preheader.load1.address, align 4, !tbaa !1
- br label %for.body23
-
-for.body23: ; preds = %for.body23, %entry
- %loop.header.load0.address = getelementptr inbounds float, float* @other_value, i64 %preheader.header.common.address.idx.zext
- %loop.header.load0. = load float, float* %loop.header.load0.address, align 4, !tbaa !1
- %reass.mul343.7 = mul nuw nsw i32 %reass.add346.7, 72
- %add7.i286.7.7 = add nuw nsw i32 %reass.mul343.7, 56
- %add9.i288.7.7 = add nuw nsw i32 %add7.i286.7.7, %mul.i.i
- %loop.header.address1.idx = add nuw nsw i32 %add9.i288.7.7, 1
- %loop.header.address1.idx.zext = zext i32 %loop.header.address1.idx to i64
- %loop.header.load1.address = getelementptr inbounds float, float* @other_value, i64 %loop.header.address1.idx.zext
- %loop.header.load1. = load float, float* %loop.header.load1.address, align 4, !tbaa !1
- store float %preheader.load0., float* @a, align 4, !tbaa !1
- store float %preheader.load1., float* @b, align 4, !tbaa !1
- store float %loop.header.load0., float* @c, align 4, !tbaa !1
- store float %loop.header.load1., float* @d, align 4, !tbaa !1
- %loaded.cnd = load i8, i8* @cnd
- %condition = trunc i8 %loaded.cnd to i1
- br i1 %condition, label %for.body23, label %exit
-
-exit:
- ret void
-}
-
-attributes #0 = { nounwind }
-
-!0 = !{i32 0, i32 65536}
-!1 = !{!2, !2, i64 0}
-!2 = !{!"float", !3, i64 0}
-!3 = !{!"omnipotent char", !4, i64 0}
-!4 = !{!"Simple C++ TBAA"}
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll
deleted file mode 100644
index 043d6ea7e92..00000000000
--- a/llvm/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s | FileCheck %s
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -o - %s | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
-
-; CHECK-LABEL: @correct_order(
-; CHECK: [[LOAD_PTR:%[0-9]+]] = bitcast i32* %next.gep1
-; CHECK: load <2 x i32>, <2 x i32>* [[LOAD_PTR]]
-; CHECK: load i32, i32* %next.gep
-; CHECK: [[STORE_PTR:%[0-9]+]] = bitcast i32* %next.gep
-; CHECK: store <2 x i32>
-; CHECK-SAME: <2 x i32>* [[STORE_PTR]]
-; CHECK: load i32, i32* %next.gep1
-define void @correct_order(i32* noalias %ptr) {
- %next.gep = getelementptr i32, i32* %ptr, i64 0
- %next.gep1 = getelementptr i32, i32* %ptr, i64 1
- %next.gep2 = getelementptr i32, i32* %ptr, i64 2
-
- %l1 = load i32, i32* %next.gep1, align 4
- %l2 = load i32, i32* %next.gep, align 4
- store i32 0, i32* %next.gep1, align 4
- store i32 0, i32* %next.gep, align 4
- %l3 = load i32, i32* %next.gep1, align 4
- %l4 = load i32, i32* %next.gep2, align 4
-
- ret void
-}
-
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/lit.local.cfg b/llvm/test/Transforms/LoadStoreVectorizer/X86/lit.local.cfg
deleted file mode 100644
index e71f3cc4c41..00000000000
--- a/llvm/test/Transforms/LoadStoreVectorizer/X86/lit.local.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-if not 'X86' in config.root.targets:
- config.unsupported = True
-
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width.ll
deleted file mode 100644
index ac5f3ea9f0f..00000000000
--- a/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -mcpu haswell -S -o - %s | FileCheck --check-prefix=CHECK-HSW %s
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -mcpu knl -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu haswell -S -o - %s | FileCheck --check-prefix=CHECK-HSW %s
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu knl -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s
-
-define <8 x double> @loadwidth_insert_extract(double* %ptr) {
- %a = bitcast double* %ptr to <2 x double> *
- %b = getelementptr <2 x double>, <2 x double>* %a, i32 1
- %c = getelementptr <2 x double>, <2 x double>* %a, i32 2
- %d = getelementptr <2 x double>, <2 x double>* %a, i32 3
-; CHECK-HSW: load <4 x double>
-; CHECK-HSW: load <4 x double>
-; CHECK-HSW-NOT: load
-; CHECK-KNL: load <8 x double>
-; CHECK-KNL-NOT: load
- %la = load <2 x double>, <2 x double> *%a
- %lb = load <2 x double>, <2 x double> *%b
- %lc = load <2 x double>, <2 x double> *%c
- %ld = load <2 x double>, <2 x double> *%d
- ; Scalarize everything - Explicitly not a shufflevector to test this code
- ; path in the LSV
- %v1 = extractelement <2 x double> %la, i32 0
- %v2 = extractelement <2 x double> %la, i32 1
- %v3 = extractelement <2 x double> %lb, i32 0
- %v4 = extractelement <2 x double> %lb, i32 1
- %v5 = extractelement <2 x double> %lc, i32 0
- %v6 = extractelement <2 x double> %lc, i32 1
- %v7 = extractelement <2 x double> %ld, i32 0
- %v8 = extractelement <2 x double> %ld, i32 1
- ; Make a vector again
- %i1 = insertelement <8 x double> undef, double %v1, i32 0
- %i2 = insertelement <8 x double> %i1, double %v2, i32 1
- %i3 = insertelement <8 x double> %i2, double %v3, i32 2
- %i4 = insertelement <8 x double> %i3, double %v4, i32 3
- %i5 = insertelement <8 x double> %i4, double %v5, i32 4
- %i6 = insertelement <8 x double> %i5, double %v6, i32 5
- %i7 = insertelement <8 x double> %i6, double %v7, i32 6
- %i8 = insertelement <8 x double> %i7, double %v8, i32 7
- ret <8 x double> %i8
-}
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/merge-tbaa.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/merge-tbaa.ll
deleted file mode 100644
index a93e9aceb73..00000000000
--- a/llvm/test/Transforms/LoadStoreVectorizer/X86/merge-tbaa.ll
+++ /dev/null
@@ -1,48 +0,0 @@
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S < %s | \
-; RUN: FileCheck %s
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S < %s | \
-; RUN: FileCheck %s
-;
-; The GPU Load & Store Vectorizer may merge differently-typed accesses into a
-; single instruction. This test checks that we merge TBAA tags for such
-; accesses correctly.
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-; struct S {
-; float f;
-; int i;
-; };
-%struct.S = type { float, i32 }
-
-; float foo(S *p) {
-; p->f -= 1;
-; p->i -= 1;
-; return p->f;
-; }
-define float @foo(%struct.S* %p) {
-entry:
-; CHECK-LABEL: foo
-; CHECK: load <2 x i32>, {{.*}}, !tbaa [[TAG_char:!.*]]
-; CHECK: store <2 x i32> {{.*}}, !tbaa [[TAG_char]]
- %f = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0
- %0 = load float, float* %f, align 4, !tbaa !2
- %sub = fadd float %0, -1.000000e+00
- store float %sub, float* %f, align 4, !tbaa !2
- %i = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 1
- %1 = load i32, i32* %i, align 4, !tbaa !8
- %sub1 = add nsw i32 %1, -1
- store i32 %sub1, i32* %i, align 4, !tbaa !8
- ret float %sub
-}
-
-!2 = !{!3, !4, i64 0}
-!3 = !{!"_ZTS1S", !4, i64 0, !7, i64 4}
-!4 = !{!"float", !5, i64 0}
-!5 = !{!"omnipotent char", !6, i64 0}
-!6 = !{!"Simple C++ TBAA"}
-!7 = !{!"int", !5, i64 0}
-!8 = !{!3, !7, i64 4}
-
-; CHECK-DAG: [[TYPE_char:!.*]] = !{!"omnipotent char", {{.*}}, i64 0}
-; CHECK-DAG: [[TAG_char]] = !{[[TYPE_char]], [[TYPE_char]], i64 0}
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/non-byte-size.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/non-byte-size.ll
deleted file mode 100644
index 7a0073808a0..00000000000
--- a/llvm/test/Transforms/LoadStoreVectorizer/X86/non-byte-size.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -load-store-vectorizer -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
-; RUN: opt < %s -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
-
-%rec = type { i32, i28 }
-
-; We currently do not optimize this scenario.
-; But we verify that we no longer crash when compiling this.
-define void @test1(%rec* %out, %rec* %in) {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT: [[IN1:%.*]] = getelementptr [[REC:%.*]], %rec* [[IN:%.*]], i16 0, i32 0
-; CHECK-NEXT: [[IN2:%.*]] = getelementptr [[REC]], %rec* [[IN]], i16 0, i32 1
-; CHECK-NEXT: [[VAL1:%.*]] = load i32, i32* [[IN1]], align 8
-; CHECK-NEXT: [[VAL2:%.*]] = load i28, i28* [[IN2]]
-; CHECK-NEXT: [[OUT1:%.*]] = getelementptr [[REC]], %rec* [[OUT:%.*]], i16 0, i32 0
-; CHECK-NEXT: [[OUT2:%.*]] = getelementptr [[REC]], %rec* [[OUT]], i16 0, i32 1
-; CHECK-NEXT: store i32 [[VAL1]], i32* [[OUT1]], align 8
-; CHECK-NEXT: store i28 [[VAL2]], i28* [[OUT2]]
-; CHECK-NEXT: ret void
-;
- %in1 = getelementptr %rec, %rec* %in, i16 0, i32 0
- %in2 = getelementptr %rec, %rec* %in, i16 0, i32 1
- %val1 = load i32, i32* %in1, align 8
- %val2 = load i28, i28* %in2
- %out1 = getelementptr %rec, %rec* %out, i16 0, i32 0
- %out2 = getelementptr %rec, %rec* %out, i16 0, i32 1
- store i32 %val1, i32* %out1, align 8
- store i28 %val2, i28* %out2
- ret void
-}
-
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll
deleted file mode 100644
index 3cfe7454baf..00000000000
--- a/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: opt -mtriple=x86_64-unknown-linux -load-store-vectorizer -S -o - %s | FileCheck %s
-; RUN: opt -mtriple=x86_64-unknown-linux -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -o - %s | FileCheck %s
-
-target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
-
-%struct.buffer_t = type { i32, i8* }
-
-; Check an i32 and i8* get vectorized, and that the two accesses
-; (load into buff.val and store to buff.p) preserve their order.
-; Vectorized loads should be inserted at the position of the first load,
-; and instructions which were between the first and last load should be
-; reordered preserving their relative order inasmuch as possible.
-
-; CHECK-LABEL: @preserve_order_32(
-; CHECK: load <2 x i32>
-; CHECK: %buff.val = load i8
-; CHECK: store i8 0
-define void @preserve_order_32(%struct.buffer_t* noalias %buff) #0 {
-entry:
- %tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i32 0, i32 1
- %buff.p = load i8*, i8** %tmp1
- %buff.val = load i8, i8* %buff.p
- store i8 0, i8* %buff.p, align 8
- %tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i32 0, i32 0
- %buff.int = load i32, i32* %tmp0, align 8
- ret void
-}
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll
deleted file mode 100644
index 3ae0d891dc5..00000000000
--- a/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll
+++ /dev/null
@@ -1,78 +0,0 @@
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s | FileCheck %s
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -o - %s | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
-
-%struct.buffer_t = type { i64, i8* }
-%struct.nested.buffer = type { %struct.buffer_t, %struct.buffer_t }
-
-; Check an i64 and i8* get vectorized, and that the two accesses
-; (load into buff.val and store to buff.p) preserve their order.
-; Vectorized loads should be inserted at the position of the first load,
-; and instructions which were between the first and last load should be
-; reordered preserving their relative order inasmuch as possible.
-
-; CHECK-LABEL: @preserve_order_64(
-; CHECK: load <2 x i64>
-; CHECK: %buff.val = load i8
-; CHECK: store i8 0
-define void @preserve_order_64(%struct.buffer_t* noalias %buff) #0 {
-entry:
- %tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 1
- %buff.p = load i8*, i8** %tmp1
- %buff.val = load i8, i8* %buff.p
- store i8 0, i8* %buff.p, align 8
- %tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 0
- %buff.int = load i64, i64* %tmp0, align 16
- ret void
-}
-
-; Check reordering recurses correctly.
-
-; CHECK-LABEL: @transitive_reorder(
-; CHECK: load <2 x i64>
-; CHECK: %buff.val = load i8
-; CHECK: store i8 0
-define void @transitive_reorder(%struct.buffer_t* noalias %buff, %struct.nested.buffer* noalias %nest) #0 {
-entry:
- %nest0_0 = getelementptr inbounds %struct.nested.buffer, %struct.nested.buffer* %nest, i64 0, i32 0
- %tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %nest0_0, i64 0, i32 1
- %buff.p = load i8*, i8** %tmp1
- %buff.val = load i8, i8* %buff.p
- store i8 0, i8* %buff.p, align 8
- %nest1_0 = getelementptr inbounds %struct.nested.buffer, %struct.nested.buffer* %nest, i64 0, i32 0
- %tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %nest1_0, i64 0, i32 0
- %buff.int = load i64, i64* %tmp0, align 16
- ret void
-}
-
-; Check for no vectorization over phi node
-
-; CHECK-LABEL: @no_vect_phi(
-; CHECK: load i8*
-; CHECK: load i8
-; CHECK: store i8 0
-; CHECK: load i64
-define void @no_vect_phi(i32* noalias %ptr, %struct.buffer_t* noalias %buff) {
-entry:
- %tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 1
- %buff.p = load i8*, i8** %tmp1
- %buff.val = load i8, i8* %buff.p
- store i8 0, i8* %buff.p, align 8
- br label %"for something"
-
-"for something":
- %index = phi i64 [ 0, %entry ], [ %index.next, %"for something" ]
-
- %tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 0
- %buff.int = load i64, i64* %tmp0, align 16
-
- %index.next = add i64 %index, 8
- %cmp_res = icmp eq i64 %index.next, 8
- br i1 %cmp_res, label %ending, label %"for something"
-
-ending:
- ret void
-}
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll
deleted file mode 100644
index 72b29912d81..00000000000
--- a/llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll
+++ /dev/null
@@ -1,118 +0,0 @@
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s | FileCheck %s
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -o - %s | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
-
-; Vectorized subsets of the load/store chains in the presence of
-; interleaved loads/stores
-
-; CHECK-LABEL: @interleave_2L_2S(
-; CHECK: load <2 x i32>
-; CHECK: load i32
-; CHECK: store <2 x i32>
-; CHECK: load i32
-define void @interleave_2L_2S(i32* noalias %ptr) {
- %next.gep = getelementptr i32, i32* %ptr, i64 0
- %next.gep1 = getelementptr i32, i32* %ptr, i64 1
- %next.gep2 = getelementptr i32, i32* %ptr, i64 2
-
- %l1 = load i32, i32* %next.gep1, align 4
- %l2 = load i32, i32* %next.gep, align 4
- store i32 0, i32* %next.gep1, align 4
- store i32 0, i32* %next.gep, align 4
- %l3 = load i32, i32* %next.gep1, align 4
- %l4 = load i32, i32* %next.gep2, align 4
-
- ret void
-}
-
-; CHECK-LABEL: @interleave_3L_2S_1L(
-; CHECK: load <3 x i32>
-; CHECK: store <2 x i32>
-; CHECK: load i32
-
-define void @interleave_3L_2S_1L(i32* noalias %ptr) {
- %next.gep = getelementptr i32, i32* %ptr, i64 0
- %next.gep1 = getelementptr i32, i32* %ptr, i64 1
- %next.gep2 = getelementptr i32, i32* %ptr, i64 2
-
- %l2 = load i32, i32* %next.gep, align 4
- %l1 = load i32, i32* %next.gep1, align 4
- store i32 0, i32* %next.gep1, align 4
- store i32 0, i32* %next.gep, align 4
- %l3 = load i32, i32* %next.gep1, align 4
- %l4 = load i32, i32* %next.gep2, align 4
-
- ret void
-}
-
-; CHECK-LABEL: @chain_suffix(
-; CHECK: load i32
-; CHECK: store <2 x i32>
-; CHECK: load <2 x i32>
-define void @chain_suffix(i32* noalias %ptr) {
- %next.gep = getelementptr i32, i32* %ptr, i64 0
- %next.gep1 = getelementptr i32, i32* %ptr, i64 1
- %next.gep2 = getelementptr i32, i32* %ptr, i64 2
-
- %l2 = load i32, i32* %next.gep, align 4
- store i32 0, i32* %next.gep1, align 4
- store i32 0, i32* %next.gep, align 4
- %l3 = load i32, i32* %next.gep1, align 4
- %l4 = load i32, i32* %next.gep2, align 4
-
- ret void
-}
-
-
-; CHECK-LABEL: @chain_prefix_suffix(
-; CHECK: load <2 x i32>
-; CHECK: store <2 x i32>
-; CHECK: load <3 x i32>
-define void @chain_prefix_suffix(i32* noalias %ptr) {
- %next.gep = getelementptr i32, i32* %ptr, i64 0
- %next.gep1 = getelementptr i32, i32* %ptr, i64 1
- %next.gep2 = getelementptr i32, i32* %ptr, i64 2
- %next.gep3 = getelementptr i32, i32* %ptr, i64 3
-
- %l1 = load i32, i32* %next.gep, align 4
- %l2 = load i32, i32* %next.gep1, align 4
- store i32 0, i32* %next.gep1, align 4
- store i32 0, i32* %next.gep2, align 4
- %l3 = load i32, i32* %next.gep1, align 4
- %l4 = load i32, i32* %next.gep2, align 4
- %l5 = load i32, i32* %next.gep3, align 4
-
- ret void
-}
-
-; FIXME: If the chain is too long and TLI says misaligned is not fast,
-; then LSV fails to vectorize anything in that chain.
-; To reproduce below, add a tmp5 (ptr+4) and load tmp5 into l6 and l7.
-
-; CHECK-LABEL: @interleave_get_longest
-; CHECK: load <3 x i32>
-; CHECK: load i32
-; CHECK: store <2 x i32> zeroinitializer
-; CHECK: load i32
-; CHECK: load i32
-; CHECK: load i32
-
-define void @interleave_get_longest(i32* noalias %ptr) {
- %tmp1 = getelementptr i32, i32* %ptr, i64 0
- %tmp2 = getelementptr i32, i32* %ptr, i64 1
- %tmp3 = getelementptr i32, i32* %ptr, i64 2
- %tmp4 = getelementptr i32, i32* %ptr, i64 3
-
- %l1 = load i32, i32* %tmp2, align 4
- %l2 = load i32, i32* %tmp1, align 4
- store i32 0, i32* %tmp2, align 4
- store i32 0, i32* %tmp1, align 4
- %l3 = load i32, i32* %tmp2, align 4
- %l4 = load i32, i32* %tmp3, align 4
- %l5 = load i32, i32* %tmp4, align 4
- %l6 = load i32, i32* %tmp4, align 4
- %l7 = load i32, i32* %tmp4, align 4
-
- ret void
-}
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/vector-scalar.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/vector-scalar.ll
deleted file mode 100644
index 00971f35038..00000000000
--- a/llvm/test/Transforms/LoadStoreVectorizer/X86/vector-scalar.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -mcpu haswell -S -o - %s | FileCheck %s
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu haswell -S -o - %s | FileCheck %s
-
-; Check that the LoadStoreVectorizer does not crash due to not differentiating <1 x T> and T.
-
-; CHECK-LABEL: @vector_scalar(
-; CHECK: store double
-; CHECK: store <1 x double>
-define void @vector_scalar(double* %ptr, double %a, <1 x double> %b) {
- %1 = bitcast double* %ptr to <1 x double>*
- %2 = getelementptr <1 x double>, <1 x double>* %1, i32 1
- store double %a, double* %ptr, align 8
- store <1 x double> %b, <1 x double>* %2, align 8
- ret void
-}
OpenPOWER on IntegriCloud