diff options
| author | Eric Christopher <echristo@gmail.com> | 2019-04-17 02:12:23 +0000 |
|---|---|---|
| committer | Eric Christopher <echristo@gmail.com> | 2019-04-17 02:12:23 +0000 |
| commit | a86343512845c9c1fdbac865fea88aa5fce7142a (patch) | |
| tree | 666fc6353de19ad8b00e56b67edd33f24104e4a7 /llvm/test/Transforms/LoadStoreVectorizer/X86 | |
| parent | 7f8ca6e3679b3af951cb7a4b1377edfaa3244b93 (diff) | |
| download | bcm5719-llvm-a86343512845c9c1fdbac865fea88aa5fce7142a.tar.gz bcm5719-llvm-a86343512845c9c1fdbac865fea88aa5fce7142a.zip | |
Temporarily Revert "Add basic loop fusion pass."
As it's causing some bot failures (and per request from kbarton).
This reverts commit r358543/ab70da07286e618016e78247e4a24fcb84077fda.
llvm-svn: 358546
Diffstat (limited to 'llvm/test/Transforms/LoadStoreVectorizer/X86')
11 files changed, 0 insertions, 547 deletions
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/codegenprepare-produced-address-math.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/codegenprepare-produced-address-math.ll deleted file mode 100644 index e29f3dfa537..00000000000 --- a/llvm/test/Transforms/LoadStoreVectorizer/X86/codegenprepare-produced-address-math.ll +++ /dev/null @@ -1,80 +0,0 @@ -; RUN: opt -codegenprepare -load-store-vectorizer %s -S -o - | FileCheck %s -; RUN: opt -load-store-vectorizer %s -S -o - | FileCheck %s -; RUN: opt -codegenprepare -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' %s -S -o - | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' %s -S -o - | FileCheck %s - -target triple = "x86_64--" - -%union = type { { [4 x [4 x [4 x [16 x float]]]], [4 x [4 x [4 x [16 x float]]]], [10 x [10 x [4 x float]]] } } - -@global_pointer = external unnamed_addr global { %union, [2000 x i8] }, align 4 - -; Function Attrs: convergent nounwind -define void @test(i32 %base) #0 { -; CHECK-LABEL: @test( -; CHECK-NOT: load i32 -; CHECK: load <2 x i32> -; CHECK-NOT: load i32 -entry: - %mul331 = and i32 %base, -4 - %add350.4 = add i32 4, %mul331 - %idx351.4 = zext i32 %add350.4 to i64 - %arrayidx352.4 = getelementptr inbounds { %union, [2000 x i8] }, { %union, [2000 x i8] }* @global_pointer, i64 0, i32 0, i32 0, i32 1, i64 0, i64 0, i64 0, i64 %idx351.4 - %tmp296.4 = bitcast float* %arrayidx352.4 to i32* - %add350.5 = add i32 5, %mul331 - %idx351.5 = zext i32 %add350.5 to i64 - %arrayidx352.5 = getelementptr inbounds { %union, [2000 x i8] }, { %union, [2000 x i8] }* @global_pointer, i64 0, i32 0, i32 0, i32 1, i64 0, i64 0, i64 0, i64 %idx351.5 - %tmp296.5 = bitcast float* %arrayidx352.5 to i32* - %cnd = icmp ult i32 %base, 1000 - br i1 %cnd, label %loads, label %exit - -loads: - ; If and only if the loads are in a different BB from the GEPs codegenprepare - ; would try to turn the GEPs into math, which makes LoadStoreVectorizer's job - ; harder - %tmp297.4 = load i32, i32* %tmp296.4, align 4, !tbaa !0 - %tmp297.5 = load i32, i32* %tmp296.5, align 4, !tbaa !0 - br label %exit - -exit: - ret void -} - -; Function Attrs: convergent nounwind -define void @test.codegenprepared(i32 %base) #0 { -; CHECK-LABEL: @test.codegenprepared( -; CHECK-NOT: load i32 -; CHECK: load <2 x i32> -; CHECK-NOT: load i32 -entry: - %mul331 = and i32 %base, -4 - %add350.4 = add i32 4, %mul331 - %idx351.4 = zext i32 %add350.4 to i64 - %add350.5 = add i32 5, %mul331 - %idx351.5 = zext i32 %add350.5 to i64 - %cnd = icmp ult i32 %base, 1000 - br i1 %cnd, label %loads, label %exit - -loads: ; preds = %entry - %sunkaddr = mul i64 %idx351.4, 4 - %sunkaddr1 = getelementptr inbounds i8, i8* bitcast ({ %union, [2000 x i8] }* @global_pointer to i8*), i64 %sunkaddr - %sunkaddr2 = getelementptr inbounds i8, i8* %sunkaddr1, i64 4096 - %0 = bitcast i8* %sunkaddr2 to i32* - %tmp297.4 = load i32, i32* %0, align 4, !tbaa !0 - %sunkaddr3 = mul i64 %idx351.5, 4 - %sunkaddr4 = getelementptr inbounds i8, i8* bitcast ({ %union, [2000 x i8] }* @global_pointer to i8*), i64 %sunkaddr3 - %sunkaddr5 = getelementptr inbounds i8, i8* %sunkaddr4, i64 4096 - %1 = bitcast i8* %sunkaddr5 to i32* - %tmp297.5 = load i32, i32* %1, align 4, !tbaa !0 - br label %exit - -exit: ; preds = %loads, %entry - ret void -} - -attributes #0 = { convergent nounwind } - -!0 = !{!1, !1, i64 0} -!1 = !{!"float", !2, i64 0} -!2 = !{!"omnipotent char", !3, i64 0} -!3 = !{!"Simple C++ TBAA"} diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/compare-scev-by-complexity.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/compare-scev-by-complexity.ll deleted file mode 100644 index e2181f6086c..00000000000 --- a/llvm/test/Transforms/LoadStoreVectorizer/X86/compare-scev-by-complexity.ll +++ /dev/null @@ -1,77 +0,0 @@ -; RUN: opt -load-store-vectorizer %s -S | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' %s -S | FileCheck %s - -; Check that setting wrapping flags after a SCEV node is created -; does not invalidate "sorted by complexity" invariant for -; operands of commutative and associative SCEV operators. - -target triple = "x86_64--" - -@global_value0 = external constant i32 -@global_value1 = external constant i32 -@other_value = external global float -@a = external global float -@b = external global float -@c = external global float -@d = external global float -@plus1 = external global i32 -@cnd = external global i8 - -; Function Attrs: nounwind -define void @main() local_unnamed_addr #0 { -; CHECK-LABEL: @main() -; CHECK: [[PTR:%[0-9]+]] = bitcast float* %preheader.load0.address to <2 x float>* -; CHECK: = load <2 x float>, <2 x float>* [[PTR]] -; CHECK-LABEL: for.body23: -entry: - %tmp = load i32, i32* @global_value0, !range !0 - %tmp2 = load i32, i32* @global_value1 - %and.i.i = and i32 %tmp2, 2 - %add.nuw.nsw.i.i = add nuw nsw i32 %and.i.i, 0 - %mul.i.i = shl nuw nsw i32 %add.nuw.nsw.i.i, 1 - %and6.i.i = and i32 %tmp2, 3 - %and9.i.i = and i32 %tmp2, 4 - %add.nuw.nsw10.i.i = add nuw nsw i32 %and6.i.i, %and9.i.i - %conv3.i42.i = add nuw nsw i32 %mul.i.i, 1 - %reass.add346.7 = add nuw nsw i32 %add.nuw.nsw10.i.i, 56 - %reass.mul347.7 = mul nuw nsw i32 %tmp, %reass.add346.7 - %add7.i.7 = add nuw nsw i32 %reass.mul347.7, 0 - %preheader.address0.idx = add nuw nsw i32 %add7.i.7, %mul.i.i - %preheader.address0.idx.zext = zext i32 %preheader.address0.idx to i64 - %preheader.load0.address = getelementptr inbounds float, float* @other_value, i64 %preheader.address0.idx.zext - %preheader.load0. = load float, float* %preheader.load0.address, align 4, !tbaa !1 - %common.address.idx = add nuw nsw i32 %add7.i.7, %conv3.i42.i - %preheader.header.common.address.idx.zext = zext i32 %common.address.idx to i64 - %preheader.load1.address = getelementptr inbounds float, float* @other_value, i64 %preheader.header.common.address.idx.zext - %preheader.load1. = load float, float* %preheader.load1.address, align 4, !tbaa !1 - br label %for.body23 - -for.body23: ; preds = %for.body23, %entry - %loop.header.load0.address = getelementptr inbounds float, float* @other_value, i64 %preheader.header.common.address.idx.zext - %loop.header.load0. = load float, float* %loop.header.load0.address, align 4, !tbaa !1 - %reass.mul343.7 = mul nuw nsw i32 %reass.add346.7, 72 - %add7.i286.7.7 = add nuw nsw i32 %reass.mul343.7, 56 - %add9.i288.7.7 = add nuw nsw i32 %add7.i286.7.7, %mul.i.i - %loop.header.address1.idx = add nuw nsw i32 %add9.i288.7.7, 1 - %loop.header.address1.idx.zext = zext i32 %loop.header.address1.idx to i64 - %loop.header.load1.address = getelementptr inbounds float, float* @other_value, i64 %loop.header.address1.idx.zext - %loop.header.load1. = load float, float* %loop.header.load1.address, align 4, !tbaa !1 - store float %preheader.load0., float* @a, align 4, !tbaa !1 - store float %preheader.load1., float* @b, align 4, !tbaa !1 - store float %loop.header.load0., float* @c, align 4, !tbaa !1 - store float %loop.header.load1., float* @d, align 4, !tbaa !1 - %loaded.cnd = load i8, i8* @cnd - %condition = trunc i8 %loaded.cnd to i1 - br i1 %condition, label %for.body23, label %exit - -exit: - ret void -} - -attributes #0 = { nounwind } - -!0 = !{i32 0, i32 65536} -!1 = !{!2, !2, i64 0} -!2 = !{!"float", !3, i64 0} -!3 = !{!"omnipotent char", !4, i64 0} -!4 = !{!"Simple C++ TBAA"} diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll deleted file mode 100644 index 043d6ea7e92..00000000000 --- a/llvm/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll +++ /dev/null @@ -1,28 +0,0 @@ -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s | FileCheck %s -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -o - %s | FileCheck %s - -target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" - -; CHECK-LABEL: @correct_order( -; CHECK: [[LOAD_PTR:%[0-9]+]] = bitcast i32* %next.gep1 -; CHECK: load <2 x i32>, <2 x i32>* [[LOAD_PTR]] -; CHECK: load i32, i32* %next.gep -; CHECK: [[STORE_PTR:%[0-9]+]] = bitcast i32* %next.gep -; CHECK: store <2 x i32> -; CHECK-SAME: <2 x i32>* [[STORE_PTR]] -; CHECK: load i32, i32* %next.gep1 -define void @correct_order(i32* noalias %ptr) { - %next.gep = getelementptr i32, i32* %ptr, i64 0 - %next.gep1 = getelementptr i32, i32* %ptr, i64 1 - %next.gep2 = getelementptr i32, i32* %ptr, i64 2 - - %l1 = load i32, i32* %next.gep1, align 4 - %l2 = load i32, i32* %next.gep, align 4 - store i32 0, i32* %next.gep1, align 4 - store i32 0, i32* %next.gep, align 4 - %l3 = load i32, i32* %next.gep1, align 4 - %l4 = load i32, i32* %next.gep2, align 4 - - ret void -} - diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/lit.local.cfg b/llvm/test/Transforms/LoadStoreVectorizer/X86/lit.local.cfg deleted file mode 100644 index e71f3cc4c41..00000000000 --- a/llvm/test/Transforms/LoadStoreVectorizer/X86/lit.local.cfg +++ /dev/null @@ -1,3 +0,0 @@ -if not 'X86' in config.root.targets: - config.unsupported = True - diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width.ll deleted file mode 100644 index ac5f3ea9f0f..00000000000 --- a/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width.ll +++ /dev/null @@ -1,40 +0,0 @@ -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -mcpu haswell -S -o - %s | FileCheck --check-prefix=CHECK-HSW %s -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -mcpu knl -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu haswell -S -o - %s | FileCheck --check-prefix=CHECK-HSW %s -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu knl -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s - -define <8 x double> @loadwidth_insert_extract(double* %ptr) { - %a = bitcast double* %ptr to <2 x double> * - %b = getelementptr <2 x double>, <2 x double>* %a, i32 1 - %c = getelementptr <2 x double>, <2 x double>* %a, i32 2 - %d = getelementptr <2 x double>, <2 x double>* %a, i32 3 -; CHECK-HSW: load <4 x double> -; CHECK-HSW: load <4 x double> -; CHECK-HSW-NOT: load -; CHECK-KNL: load <8 x double> -; CHECK-KNL-NOT: load - %la = load <2 x double>, <2 x double> *%a - %lb = load <2 x double>, <2 x double> *%b - %lc = load <2 x double>, <2 x double> *%c - %ld = load <2 x double>, <2 x double> *%d - ; Scalarize everything - Explicitly not a shufflevector to test this code - ; path in the LSV - %v1 = extractelement <2 x double> %la, i32 0 - %v2 = extractelement <2 x double> %la, i32 1 - %v3 = extractelement <2 x double> %lb, i32 0 - %v4 = extractelement <2 x double> %lb, i32 1 - %v5 = extractelement <2 x double> %lc, i32 0 - %v6 = extractelement <2 x double> %lc, i32 1 - %v7 = extractelement <2 x double> %ld, i32 0 - %v8 = extractelement <2 x double> %ld, i32 1 - ; Make a vector again - %i1 = insertelement <8 x double> undef, double %v1, i32 0 - %i2 = insertelement <8 x double> %i1, double %v2, i32 1 - %i3 = insertelement <8 x double> %i2, double %v3, i32 2 - %i4 = insertelement <8 x double> %i3, double %v4, i32 3 - %i5 = insertelement <8 x double> %i4, double %v5, i32 4 - %i6 = insertelement <8 x double> %i5, double %v6, i32 5 - %i7 = insertelement <8 x double> %i6, double %v7, i32 6 - %i8 = insertelement <8 x double> %i7, double %v8, i32 7 - ret <8 x double> %i8 -} diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/merge-tbaa.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/merge-tbaa.ll deleted file mode 100644 index a93e9aceb73..00000000000 --- a/llvm/test/Transforms/LoadStoreVectorizer/X86/merge-tbaa.ll +++ /dev/null @@ -1,48 +0,0 @@ -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S < %s | \ -; RUN: FileCheck %s -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S < %s | \ -; RUN: FileCheck %s -; -; The GPU Load & Store Vectorizer may merge differently-typed accesses into a -; single instruction. This test checks that we merge TBAA tags for such -; accesses correctly. - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -; struct S { -; float f; -; int i; -; }; -%struct.S = type { float, i32 } - -; float foo(S *p) { -; p->f -= 1; -; p->i -= 1; -; return p->f; -; } -define float @foo(%struct.S* %p) { -entry: -; CHECK-LABEL: foo -; CHECK: load <2 x i32>, {{.*}}, !tbaa [[TAG_char:!.*]] -; CHECK: store <2 x i32> {{.*}}, !tbaa [[TAG_char]] - %f = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0 - %0 = load float, float* %f, align 4, !tbaa !2 - %sub = fadd float %0, -1.000000e+00 - store float %sub, float* %f, align 4, !tbaa !2 - %i = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 1 - %1 = load i32, i32* %i, align 4, !tbaa !8 - %sub1 = add nsw i32 %1, -1 - store i32 %sub1, i32* %i, align 4, !tbaa !8 - ret float %sub -} - -!2 = !{!3, !4, i64 0} -!3 = !{!"_ZTS1S", !4, i64 0, !7, i64 4} -!4 = !{!"float", !5, i64 0} -!5 = !{!"omnipotent char", !6, i64 0} -!6 = !{!"Simple C++ TBAA"} -!7 = !{!"int", !5, i64 0} -!8 = !{!3, !7, i64 4} - -; CHECK-DAG: [[TYPE_char:!.*]] = !{!"omnipotent char", {{.*}}, i64 0} -; CHECK-DAG: [[TAG_char]] = !{[[TYPE_char]], [[TYPE_char]], i64 0} diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/non-byte-size.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/non-byte-size.ll deleted file mode 100644 index 7a0073808a0..00000000000 --- a/llvm/test/Transforms/LoadStoreVectorizer/X86/non-byte-size.ll +++ /dev/null @@ -1,31 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -load-store-vectorizer -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s - -%rec = type { i32, i28 } - -; We currently do not optimize this scenario. -; But we verify that we no longer crash when compiling this. -define void @test1(%rec* %out, %rec* %in) { -; CHECK-LABEL: @test1( -; CHECK-NEXT: [[IN1:%.*]] = getelementptr [[REC:%.*]], %rec* [[IN:%.*]], i16 0, i32 0 -; CHECK-NEXT: [[IN2:%.*]] = getelementptr [[REC]], %rec* [[IN]], i16 0, i32 1 -; CHECK-NEXT: [[VAL1:%.*]] = load i32, i32* [[IN1]], align 8 -; CHECK-NEXT: [[VAL2:%.*]] = load i28, i28* [[IN2]] -; CHECK-NEXT: [[OUT1:%.*]] = getelementptr [[REC]], %rec* [[OUT:%.*]], i16 0, i32 0 -; CHECK-NEXT: [[OUT2:%.*]] = getelementptr [[REC]], %rec* [[OUT]], i16 0, i32 1 -; CHECK-NEXT: store i32 [[VAL1]], i32* [[OUT1]], align 8 -; CHECK-NEXT: store i28 [[VAL2]], i28* [[OUT2]] -; CHECK-NEXT: ret void -; - %in1 = getelementptr %rec, %rec* %in, i16 0, i32 0 - %in2 = getelementptr %rec, %rec* %in, i16 0, i32 1 - %val1 = load i32, i32* %in1, align 8 - %val2 = load i28, i28* %in2 - %out1 = getelementptr %rec, %rec* %out, i16 0, i32 0 - %out2 = getelementptr %rec, %rec* %out, i16 0, i32 1 - store i32 %val1, i32* %out1, align 8 - store i28 %val2, i28* %out2 - ret void -} - diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll deleted file mode 100644 index 3cfe7454baf..00000000000 --- a/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll +++ /dev/null @@ -1,29 +0,0 @@ -; RUN: opt -mtriple=x86_64-unknown-linux -load-store-vectorizer -S -o - %s | FileCheck %s -; RUN: opt -mtriple=x86_64-unknown-linux -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -o - %s | FileCheck %s - -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" - -%struct.buffer_t = type { i32, i8* } - -; Check an i32 and i8* get vectorized, and that the two accesses -; (load into buff.val and store to buff.p) preserve their order. -; Vectorized loads should be inserted at the position of the first load, -; and instructions which were between the first and last load should be -; reordered preserving their relative order inasmuch as possible. - -; CHECK-LABEL: @preserve_order_32( -; CHECK: load <2 x i32> -; CHECK: %buff.val = load i8 -; CHECK: store i8 0 -define void @preserve_order_32(%struct.buffer_t* noalias %buff) #0 { -entry: - %tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i32 0, i32 1 - %buff.p = load i8*, i8** %tmp1 - %buff.val = load i8, i8* %buff.p - store i8 0, i8* %buff.p, align 8 - %tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i32 0, i32 0 - %buff.int = load i32, i32* %tmp0, align 8 - ret void -} - -attributes #0 = { nounwind } diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll deleted file mode 100644 index 3ae0d891dc5..00000000000 --- a/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll +++ /dev/null @@ -1,78 +0,0 @@ -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s | FileCheck %s -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -o - %s | FileCheck %s - -target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" - -%struct.buffer_t = type { i64, i8* } -%struct.nested.buffer = type { %struct.buffer_t, %struct.buffer_t } - -; Check an i64 and i8* get vectorized, and that the two accesses -; (load into buff.val and store to buff.p) preserve their order. -; Vectorized loads should be inserted at the position of the first load, -; and instructions which were between the first and last load should be -; reordered preserving their relative order inasmuch as possible. - -; CHECK-LABEL: @preserve_order_64( -; CHECK: load <2 x i64> -; CHECK: %buff.val = load i8 -; CHECK: store i8 0 -define void @preserve_order_64(%struct.buffer_t* noalias %buff) #0 { -entry: - %tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 1 - %buff.p = load i8*, i8** %tmp1 - %buff.val = load i8, i8* %buff.p - store i8 0, i8* %buff.p, align 8 - %tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 0 - %buff.int = load i64, i64* %tmp0, align 16 - ret void -} - -; Check reordering recurses correctly. - -; CHECK-LABEL: @transitive_reorder( -; CHECK: load <2 x i64> -; CHECK: %buff.val = load i8 -; CHECK: store i8 0 -define void @transitive_reorder(%struct.buffer_t* noalias %buff, %struct.nested.buffer* noalias %nest) #0 { -entry: - %nest0_0 = getelementptr inbounds %struct.nested.buffer, %struct.nested.buffer* %nest, i64 0, i32 0 - %tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %nest0_0, i64 0, i32 1 - %buff.p = load i8*, i8** %tmp1 - %buff.val = load i8, i8* %buff.p - store i8 0, i8* %buff.p, align 8 - %nest1_0 = getelementptr inbounds %struct.nested.buffer, %struct.nested.buffer* %nest, i64 0, i32 0 - %tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %nest1_0, i64 0, i32 0 - %buff.int = load i64, i64* %tmp0, align 16 - ret void -} - -; Check for no vectorization over phi node - -; CHECK-LABEL: @no_vect_phi( -; CHECK: load i8* -; CHECK: load i8 -; CHECK: store i8 0 -; CHECK: load i64 -define void @no_vect_phi(i32* noalias %ptr, %struct.buffer_t* noalias %buff) { -entry: - %tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 1 - %buff.p = load i8*, i8** %tmp1 - %buff.val = load i8, i8* %buff.p - store i8 0, i8* %buff.p, align 8 - br label %"for something" - -"for something": - %index = phi i64 [ 0, %entry ], [ %index.next, %"for something" ] - - %tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 0 - %buff.int = load i64, i64* %tmp0, align 16 - - %index.next = add i64 %index, 8 - %cmp_res = icmp eq i64 %index.next, 8 - br i1 %cmp_res, label %ending, label %"for something" - -ending: - ret void -} - -attributes #0 = { nounwind } diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll deleted file mode 100644 index 72b29912d81..00000000000 --- a/llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll +++ /dev/null @@ -1,118 +0,0 @@ -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s | FileCheck %s -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -o - %s | FileCheck %s - -target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" - -; Vectorized subsets of the load/store chains in the presence of -; interleaved loads/stores - -; CHECK-LABEL: @interleave_2L_2S( -; CHECK: load <2 x i32> -; CHECK: load i32 -; CHECK: store <2 x i32> -; CHECK: load i32 -define void @interleave_2L_2S(i32* noalias %ptr) { - %next.gep = getelementptr i32, i32* %ptr, i64 0 - %next.gep1 = getelementptr i32, i32* %ptr, i64 1 - %next.gep2 = getelementptr i32, i32* %ptr, i64 2 - - %l1 = load i32, i32* %next.gep1, align 4 - %l2 = load i32, i32* %next.gep, align 4 - store i32 0, i32* %next.gep1, align 4 - store i32 0, i32* %next.gep, align 4 - %l3 = load i32, i32* %next.gep1, align 4 - %l4 = load i32, i32* %next.gep2, align 4 - - ret void -} - -; CHECK-LABEL: @interleave_3L_2S_1L( -; CHECK: load <3 x i32> -; CHECK: store <2 x i32> -; CHECK: load i32 - -define void @interleave_3L_2S_1L(i32* noalias %ptr) { - %next.gep = getelementptr i32, i32* %ptr, i64 0 - %next.gep1 = getelementptr i32, i32* %ptr, i64 1 - %next.gep2 = getelementptr i32, i32* %ptr, i64 2 - - %l2 = load i32, i32* %next.gep, align 4 - %l1 = load i32, i32* %next.gep1, align 4 - store i32 0, i32* %next.gep1, align 4 - store i32 0, i32* %next.gep, align 4 - %l3 = load i32, i32* %next.gep1, align 4 - %l4 = load i32, i32* %next.gep2, align 4 - - ret void -} - -; CHECK-LABEL: @chain_suffix( -; CHECK: load i32 -; CHECK: store <2 x i32> -; CHECK: load <2 x i32> -define void @chain_suffix(i32* noalias %ptr) { - %next.gep = getelementptr i32, i32* %ptr, i64 0 - %next.gep1 = getelementptr i32, i32* %ptr, i64 1 - %next.gep2 = getelementptr i32, i32* %ptr, i64 2 - - %l2 = load i32, i32* %next.gep, align 4 - store i32 0, i32* %next.gep1, align 4 - store i32 0, i32* %next.gep, align 4 - %l3 = load i32, i32* %next.gep1, align 4 - %l4 = load i32, i32* %next.gep2, align 4 - - ret void -} - - -; CHECK-LABEL: @chain_prefix_suffix( -; CHECK: load <2 x i32> -; CHECK: store <2 x i32> -; CHECK: load <3 x i32> -define void @chain_prefix_suffix(i32* noalias %ptr) { - %next.gep = getelementptr i32, i32* %ptr, i64 0 - %next.gep1 = getelementptr i32, i32* %ptr, i64 1 - %next.gep2 = getelementptr i32, i32* %ptr, i64 2 - %next.gep3 = getelementptr i32, i32* %ptr, i64 3 - - %l1 = load i32, i32* %next.gep, align 4 - %l2 = load i32, i32* %next.gep1, align 4 - store i32 0, i32* %next.gep1, align 4 - store i32 0, i32* %next.gep2, align 4 - %l3 = load i32, i32* %next.gep1, align 4 - %l4 = load i32, i32* %next.gep2, align 4 - %l5 = load i32, i32* %next.gep3, align 4 - - ret void -} - -; FIXME: If the chain is too long and TLI says misaligned is not fast, -; then LSV fails to vectorize anything in that chain. -; To reproduce below, add a tmp5 (ptr+4) and load tmp5 into l6 and l7. - -; CHECK-LABEL: @interleave_get_longest -; CHECK: load <3 x i32> -; CHECK: load i32 -; CHECK: store <2 x i32> zeroinitializer -; CHECK: load i32 -; CHECK: load i32 -; CHECK: load i32 - -define void @interleave_get_longest(i32* noalias %ptr) { - %tmp1 = getelementptr i32, i32* %ptr, i64 0 - %tmp2 = getelementptr i32, i32* %ptr, i64 1 - %tmp3 = getelementptr i32, i32* %ptr, i64 2 - %tmp4 = getelementptr i32, i32* %ptr, i64 3 - - %l1 = load i32, i32* %tmp2, align 4 - %l2 = load i32, i32* %tmp1, align 4 - store i32 0, i32* %tmp2, align 4 - store i32 0, i32* %tmp1, align 4 - %l3 = load i32, i32* %tmp2, align 4 - %l4 = load i32, i32* %tmp3, align 4 - %l5 = load i32, i32* %tmp4, align 4 - %l6 = load i32, i32* %tmp4, align 4 - %l7 = load i32, i32* %tmp4, align 4 - - ret void -} diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/vector-scalar.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/vector-scalar.ll deleted file mode 100644 index 00971f35038..00000000000 --- a/llvm/test/Transforms/LoadStoreVectorizer/X86/vector-scalar.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -mcpu haswell -S -o - %s | FileCheck %s -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu haswell -S -o - %s | FileCheck %s - -; Check that the LoadStoreVectorizer does not crash due to not differentiating <1 x T> and T. - -; CHECK-LABEL: @vector_scalar( -; CHECK: store double -; CHECK: store <1 x double> -define void @vector_scalar(double* %ptr, double %a, <1 x double> %b) { - %1 = bitcast double* %ptr to <1 x double>* - %2 = getelementptr <1 x double>, <1 x double>* %1, i32 1 - store double %a, double* %ptr, align 8 - store <1 x double> %b, <1 x double>* %2, align 8 - ret void -} |

