Revert r298620: [LV] Vectorize GEPs

Reason: breaks linking Chromium with LLD + ThinLTO (a pass crashes) LLVM bug: https://bugs.llvm.org//show_bug.cgi?id=32413 Original change description: [LV] Vectorize GEPs This patch adds support for vectorizing GEPs. Previously, we only generated vector GEPs on-demand when creating gather or scatter operations. All GEPs from the original loop were scalarized by default, and if a pointer was to be stored to memory, we would have to build up the pointer vector with insertelement instructions. With this patch, we will vectorize all GEPs that haven't already been marked for scalarization. The patch refines collectLoopScalars to more exactly identify the scalar GEPs. The function now more closely resembles collectLoopUniforms. And the patch moves vector GEP creation out of vectorizeMemoryInstruction and into the main vectorization loop. The vector GEPs needed for gather and scatter operations will have already been generated before vectoring the memory accesses. Original Differential Revision: https://reviews.llvm.org/D30710 llvm-svn: 298735
author: Ivan Krasin <krasin@chromium.org> 2017-03-24 20:49:43 +0000
committer: Ivan Krasin <krasin@chromium.org> 2017-03-24 20:49:43 +0000
commit: c2124e185c10b2d41fcc7eda8a492fa791135a64 (patch)
tree: dda612b1ada6ba488d93893bbd62301a8aab9f03 /llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
parent: 167af85ec7f7fce3d42b5f4bf4bb797d1cba0e0a (diff)
download: bcm5719-llvm-c2124e185c10b2d41fcc7eda8a492fa791135a64.tar.gz
bcm5719-llvm-c2124e185c10b2d41fcc7eda8a492fa791135a64.zip
1 files changed, 17 insertions, 27 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
index 82f2e064a58..820335276dc 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
@@ -13,33 +13,23 @@ target triple = "x86_64-unknown-linux-gnu"
 ; scatter operation. %tmp3 (and the induction variable) should not be marked
 ; uniform-after-vectorization.
 ;
-; CHECK:       LV: Found uniform instruction: %tmp0 = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 %i
-; CHECK-NOT:   LV: Found uniform instruction: %tmp3 = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %i
-; CHECK-NOT:   LV: Found uniform instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
-; CHECK-NOT:   LV: Found uniform instruction: %i.next = add nuw nsw i64 %i, 5
-; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x float> undef, float %x, i32 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x float> [[BROADCAST_SPLATINSERT]], <16 x float> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT:    br label %vector.body
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 5, i64 10, i64 15, i64 20, i64 25, i64 30, i64 35, i64 40, i64 45, i64 50, i64 55, i64 60, i64 65, i64 70, i64 75>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ]
-; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 [[OFFSET_IDX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <80 x float>*
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <80 x float>, <80 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <80 x float> [[WIDE_VEC]], <80 x float> undef, <16 x i32> <i32 0, i32 5, i32 10, i32 15, i32 20, i32 25, i32 30, i32 35, i32 40, i32 45, i32 50, i32 55, i32 60, i32 65, i32 70, i32 75>
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul <16 x float> [[BROADCAST_SPLAT]], [[STRIDED_VEC]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds %data, %data* %d, i64 0, i32 0, <16 x i64> [[VEC_IND]]
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <16 x float*> [[TMP3]] to <16 x <80 x float>*>
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <16 x <80 x float>*> [[BC]], i32 0
-; CHECK-NEXT:    [[WIDE_VEC1:%.*]] = load <80 x float>, <80 x float>* [[TMP4]], align 4
-; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <80 x float> [[WIDE_VEC1]], <80 x float> undef, <16 x i32> <i32 0, i32 5, i32 10, i32 15, i32 20, i32 25, i32 30, i32 35, i32 40, i32 45, i32 50, i32 55, i32 60, i32 65, i32 70, i32 75>
-; CHECK-NEXT:    [[TMP5:%.*]] = fadd <16 x float> [[STRIDED_VEC2]], [[TMP2]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v16f32(<16 x float> [[TMP5]], <16 x float*> [[TMP3]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80>
-; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body
+; CHECK:     LV: Found uniform instruction: %tmp0 = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 %i
+; CHECK-NOT: LV: Found uniform instruction: %tmp3 = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %i
+; CHECK-NOT: LV: Found uniform instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+; CHECK-NOT: LV: Found uniform instruction: %i.next = add nuw nsw i64 %i, 5
+; CHECK:     vector.body:
+; CHECK:       %index = phi i64
+; CHECK:       %vec.ind = phi <16 x i64>
+; CHECK:       %[[T0:.+]] = mul i64 %index, 5
+; CHECK:       %[[T1:.+]] = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 %[[T0]]
+; CHECK:       %[[T2:.+]] = bitcast float* %[[T1]] to <80 x float>*
+; CHECK:       load <80 x float>, <80 x float>* %[[T2]], align 4
+; CHECK:       %[[T3:.+]] = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %[[T0]]
+; CHECK:       %[[T4:.+]] = bitcast float* %[[T3]] to <80 x float>*
+; CHECK:       load <80 x float>, <80 x float>* %[[T4]], align 4
+; CHECK:       %VectorGep = getelementptr inbounds %data, %data* %d, i64 0, i32 0, <16 x i64> %vec.ind
+; CHECK:       call void @llvm.masked.scatter.v16f32({{.*}}, <16 x float*> %VectorGep, {{.*}})
+; CHECK:       br i1 {{.*}}, label %middle.block, label %vector.body
 
 %data = type { [32000 x float], [3 x i32], [4 x i8], [32000 x float] }
author	Ivan Krasin <krasin@chromium.org>	2017-03-24 20:49:43 +0000
committer	Ivan Krasin <krasin@chromium.org>	2017-03-24 20:49:43 +0000
commit	c2124e185c10b2d41fcc7eda8a492fa791135a64 (patch)
tree	dda612b1ada6ba488d93893bbd62301a8aab9f03 /llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
parent	167af85ec7f7fce3d42b5f4bf4bb797d1cba0e0a (diff)
download	bcm5719-llvm-c2124e185c10b2d41fcc7eda8a492fa791135a64.tar.gz bcm5719-llvm-c2124e185c10b2d41fcc7eda8a492fa791135a64.zip