Revert r190764: PPC: Don't restrict lvsl generation to after type legalization

This is causing test-suite failures. Original commit message: The PPC backend uses a target-specific DAG combine to turn unaligned Altivec loads into a permutation-based sequence when possible. Unfortunately, the target-specific DAG combine is not always called on all loads of interest (sometimes the routines in DAGCombine call CombineTo such that the new node and users are not added to the worklist); allowing the combine to trigger early (before type legalization) mitigates this problem. Because the autovectorizers only create legal vector types, I don't expect a lot of cases where this optimization is enabled by type legalization in practice. llvm-svn: 190765
author: Hal Finkel <hfinkel@anl.gov> 2013-09-15 15:41:11 +0000
committer: Hal Finkel <hfinkel@anl.gov> 2013-09-15 15:41:11 +0000
commit: 31025a63250998cc1f11aa0d54a3cf0ad7783120 (patch)
tree: 27af2eb184887cc349b9787e708e66d5dae4439a
parent: 2945d4e91658caf48aa67ebbfe7bf47b8697f6ae (diff)
download: bcm5719-llvm-31025a63250998cc1f11aa0d54a3cf0ad7783120.tar.gz
bcm5719-llvm-31025a63250998cc1f11aa0d54a3cf0ad7783120.zip
2 files changed, 1 insertions, 154 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index ddad9f82a27..ae046f0b7af 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7177,6 +7177,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
     if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
         TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
+        DCI.getDAGCombineLevel() == AfterLegalizeTypes &&
         LD->getAlignment() < ABIAlignment) {
       // This is a type-legal unaligned Altivec load.
       SDValue Chain = LD->getChain();
diff --git a/llvm/test/CodeGen/PowerPC/unal-altivec2.ll b/llvm/test/CodeGen/PowerPC/unal-altivec2.ll
deleted file mode 100644
index 0ad4b1072c2..00000000000
--- a/llvm/test/CodeGen/PowerPC/unal-altivec2.ll
+++ /dev/null
@@ -1,154 +0,0 @@
-; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64-unknown-linux-gnu"
-
-; Function Attrs: nounwind
-define void @foo(float* noalias nocapture %x, float* noalias nocapture readonly %y) #0 {
-entry:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %entry
-; CHECK-LABEL: @foo
-; CHECK: lvsl
-; CHECK: blr
-  %index = phi i64 [ 0, %entry ], [ %index.next.15, %vector.body ]
-  %0 = getelementptr inbounds float* %y, i64 %index
-  %1 = bitcast float* %0 to <4 x float>*
-  %wide.load = load <4 x float>* %1, align 4
-  %2 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load)
-  %3 = getelementptr inbounds float* %x, i64 %index
-  %4 = bitcast float* %3 to <4 x float>*
-  store <4 x float> %2, <4 x float>* %4, align 4
-  %index.next = add i64 %index, 4
-  %5 = getelementptr inbounds float* %y, i64 %index.next
-  %6 = bitcast float* %5 to <4 x float>*
-  %wide.load.1 = load <4 x float>* %6, align 4
-  %7 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.1)
-  %8 = getelementptr inbounds float* %x, i64 %index.next
-  %9 = bitcast float* %8 to <4 x float>*
-  store <4 x float> %7, <4 x float>* %9, align 4
-  %index.next.1 = add i64 %index.next, 4
-  %10 = getelementptr inbounds float* %y, i64 %index.next.1
-  %11 = bitcast float* %10 to <4 x float>*
-  %wide.load.2 = load <4 x float>* %11, align 4
-  %12 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.2)
-  %13 = getelementptr inbounds float* %x, i64 %index.next.1
-  %14 = bitcast float* %13 to <4 x float>*
-  store <4 x float> %12, <4 x float>* %14, align 4
-  %index.next.2 = add i64 %index.next.1, 4
-  %15 = getelementptr inbounds float* %y, i64 %index.next.2
-  %16 = bitcast float* %15 to <4 x float>*
-  %wide.load.3 = load <4 x float>* %16, align 4
-  %17 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.3)
-  %18 = getelementptr inbounds float* %x, i64 %index.next.2
-  %19 = bitcast float* %18 to <4 x float>*
-  store <4 x float> %17, <4 x float>* %19, align 4
-  %index.next.3 = add i64 %index.next.2, 4
-  %20 = getelementptr inbounds float* %y, i64 %index.next.3
-  %21 = bitcast float* %20 to <4 x float>*
-  %wide.load.4 = load <4 x float>* %21, align 4
-  %22 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.4)
-  %23 = getelementptr inbounds float* %x, i64 %index.next.3
-  %24 = bitcast float* %23 to <4 x float>*
-  store <4 x float> %22, <4 x float>* %24, align 4
-  %index.next.4 = add i64 %index.next.3, 4
-  %25 = getelementptr inbounds float* %y, i64 %index.next.4
-  %26 = bitcast float* %25 to <4 x float>*
-  %wide.load.5 = load <4 x float>* %26, align 4
-  %27 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.5)
-  %28 = getelementptr inbounds float* %x, i64 %index.next.4
-  %29 = bitcast float* %28 to <4 x float>*
-  store <4 x float> %27, <4 x float>* %29, align 4
-  %index.next.5 = add i64 %index.next.4, 4
-  %30 = getelementptr inbounds float* %y, i64 %index.next.5
-  %31 = bitcast float* %30 to <4 x float>*
-  %wide.load.6 = load <4 x float>* %31, align 4
-  %32 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.6)
-  %33 = getelementptr inbounds float* %x, i64 %index.next.5
-  %34 = bitcast float* %33 to <4 x float>*
-  store <4 x float> %32, <4 x float>* %34, align 4
-  %index.next.6 = add i64 %index.next.5, 4
-  %35 = getelementptr inbounds float* %y, i64 %index.next.6
-  %36 = bitcast float* %35 to <4 x float>*
-  %wide.load.7 = load <4 x float>* %36, align 4
-  %37 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.7)
-  %38 = getelementptr inbounds float* %x, i64 %index.next.6
-  %39 = bitcast float* %38 to <4 x float>*
-  store <4 x float> %37, <4 x float>* %39, align 4
-  %index.next.7 = add i64 %index.next.6, 4
-  %40 = getelementptr inbounds float* %y, i64 %index.next.7
-  %41 = bitcast float* %40 to <4 x float>*
-  %wide.load.8 = load <4 x float>* %41, align 4
-  %42 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.8)
-  %43 = getelementptr inbounds float* %x, i64 %index.next.7
-  %44 = bitcast float* %43 to <4 x float>*
-  store <4 x float> %42, <4 x float>* %44, align 4
-  %index.next.8 = add i64 %index.next.7, 4
-  %45 = getelementptr inbounds float* %y, i64 %index.next.8
-  %46 = bitcast float* %45 to <4 x float>*
-  %wide.load.9 = load <4 x float>* %46, align 4
-  %47 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.9)
-  %48 = getelementptr inbounds float* %x, i64 %index.next.8
-  %49 = bitcast float* %48 to <4 x float>*
-  store <4 x float> %47, <4 x float>* %49, align 4
-  %index.next.9 = add i64 %index.next.8, 4
-  %50 = getelementptr inbounds float* %y, i64 %index.next.9
-  %51 = bitcast float* %50 to <4 x float>*
-  %wide.load.10 = load <4 x float>* %51, align 4
-  %52 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.10)
-  %53 = getelementptr inbounds float* %x, i64 %index.next.9
-  %54 = bitcast float* %53 to <4 x float>*
-  store <4 x float> %52, <4 x float>* %54, align 4
-  %index.next.10 = add i64 %index.next.9, 4
-  %55 = getelementptr inbounds float* %y, i64 %index.next.10
-  %56 = bitcast float* %55 to <4 x float>*
-  %wide.load.11 = load <4 x float>* %56, align 4
-  %57 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.11)
-  %58 = getelementptr inbounds float* %x, i64 %index.next.10
-  %59 = bitcast float* %58 to <4 x float>*
-  store <4 x float> %57, <4 x float>* %59, align 4
-  %index.next.11 = add i64 %index.next.10, 4
-  %60 = getelementptr inbounds float* %y, i64 %index.next.11
-  %61 = bitcast float* %60 to <4 x float>*
-  %wide.load.12 = load <4 x float>* %61, align 4
-  %62 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.12)
-  %63 = getelementptr inbounds float* %x, i64 %index.next.11
-  %64 = bitcast float* %63 to <4 x float>*
-  store <4 x float> %62, <4 x float>* %64, align 4
-  %index.next.12 = add i64 %index.next.11, 4
-  %65 = getelementptr inbounds float* %y, i64 %index.next.12
-  %66 = bitcast float* %65 to <4 x float>*
-  %wide.load.13 = load <4 x float>* %66, align 4
-  %67 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.13)
-  %68 = getelementptr inbounds float* %x, i64 %index.next.12
-  %69 = bitcast float* %68 to <4 x float>*
-  store <4 x float> %67, <4 x float>* %69, align 4
-  %index.next.13 = add i64 %index.next.12, 4
-  %70 = getelementptr inbounds float* %y, i64 %index.next.13
-  %71 = bitcast float* %70 to <4 x float>*
-  %wide.load.14 = load <4 x float>* %71, align 4
-  %72 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.14)
-  %73 = getelementptr inbounds float* %x, i64 %index.next.13
-  %74 = bitcast float* %73 to <4 x float>*
-  store <4 x float> %72, <4 x float>* %74, align 4
-  %index.next.14 = add i64 %index.next.13, 4
-  %75 = getelementptr inbounds float* %y, i64 %index.next.14
-  %76 = bitcast float* %75 to <4 x float>*
-  %wide.load.15 = load <4 x float>* %76, align 4
-  %77 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.15)
-  %78 = getelementptr inbounds float* %x, i64 %index.next.14
-  %79 = bitcast float* %78 to <4 x float>*
-  store <4 x float> %77, <4 x float>* %79, align 4
-  %index.next.15 = add i64 %index.next.14, 4
-  %80 = icmp eq i64 %index.next.15, 2048
-  br i1 %80, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-}
-
-; Function Attrs: nounwind readonly
-declare <4 x float> @llvm_cos_v4f32(<4 x float>) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
author	Hal Finkel <hfinkel@anl.gov>	2013-09-15 15:41:11 +0000
committer	Hal Finkel <hfinkel@anl.gov>	2013-09-15 15:41:11 +0000
commit	31025a63250998cc1f11aa0d54a3cf0ad7783120 (patch)
tree	27af2eb184887cc349b9787e708e66d5dae4439a
parent	2945d4e91658caf48aa67ebbfe7bf47b8697f6ae (diff)
download	bcm5719-llvm-31025a63250998cc1f11aa0d54a3cf0ad7783120.tar.gz bcm5719-llvm-31025a63250998cc1f11aa0d54a3cf0ad7783120.zip