diff options
author | Matthew Simpson <mssimpso@codeaurora.org> | 2017-03-23 16:29:58 +0000 |
---|---|---|
committer | Matthew Simpson <mssimpso@codeaurora.org> | 2017-03-23 16:29:58 +0000 |
commit | 4e7b71bc86e276bb87a4af7b457367f15eba00f2 (patch) | |
tree | b5bbb82388b14923560733a5cf089b241439e0ec /llvm/test/Transforms/LoopVectorize | |
parent | eaada5c91e986ec595dc9661c5f9232d63f988c8 (diff) | |
download | bcm5719-llvm-4e7b71bc86e276bb87a4af7b457367f15eba00f2.tar.gz bcm5719-llvm-4e7b71bc86e276bb87a4af7b457367f15eba00f2.zip |
[LV] Vectorize GEPs
This patch adds support for vectorizing GEPs. Previously, we only generated
vector GEPs on-demand when creating gather or scatter operations. All GEPs from
the original loop were scalarized by default, and if a pointer was to be stored
to memory, we would have to build up the pointer vector with insertelement
instructions.
With this patch, we will vectorize all GEPs that haven't already been marked
for scalarization.
The patch refines collectLoopScalars to more exactly identify the scalar GEPs.
The function now more closely resembles collectLoopUniforms. And the patch
moves vector GEP creation out of vectorizeMemoryInstruction and into the main
vectorization loop. The vector GEPs needed for gather and scatter operations
will have already been generated before vectoring the memory accesses.
Differential Revision: https://reviews.llvm.org/D30710
llvm-svn: 298620
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize')
3 files changed, 104 insertions, 107 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll index 820335276dc..82f2e064a58 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll @@ -13,23 +13,33 @@ target triple = "x86_64-unknown-linux-gnu" ; scatter operation. %tmp3 (and the induction variable) should not be marked ; uniform-after-vectorization. ; -; CHECK: LV: Found uniform instruction: %tmp0 = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 %i -; CHECK-NOT: LV: Found uniform instruction: %tmp3 = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %i -; CHECK-NOT: LV: Found uniform instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] -; CHECK-NOT: LV: Found uniform instruction: %i.next = add nuw nsw i64 %i, 5 -; CHECK: vector.body: -; CHECK: %index = phi i64 -; CHECK: %vec.ind = phi <16 x i64> -; CHECK: %[[T0:.+]] = mul i64 %index, 5 -; CHECK: %[[T1:.+]] = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 %[[T0]] -; CHECK: %[[T2:.+]] = bitcast float* %[[T1]] to <80 x float>* -; CHECK: load <80 x float>, <80 x float>* %[[T2]], align 4 -; CHECK: %[[T3:.+]] = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %[[T0]] -; CHECK: %[[T4:.+]] = bitcast float* %[[T3]] to <80 x float>* -; CHECK: load <80 x float>, <80 x float>* %[[T4]], align 4 -; CHECK: %VectorGep = getelementptr inbounds %data, %data* %d, i64 0, i32 0, <16 x i64> %vec.ind -; CHECK: call void @llvm.masked.scatter.v16f32({{.*}}, <16 x float*> %VectorGep, {{.*}}) -; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body +; CHECK: LV: Found uniform instruction: %tmp0 = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 %i +; CHECK-NOT: LV: Found uniform instruction: %tmp3 = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %i +; CHECK-NOT: LV: Found uniform instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] +; CHECK-NOT: LV: Found uniform instruction: %i.next = add nuw nsw i64 %i, 5 +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x float> undef, float %x, i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x float> [[BROADCAST_SPLATINSERT]], <16 x float> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: br label %vector.body +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 5, i64 10, i64 15, i64 20, i64 25, i64 30, i64 35, i64 40, i64 45, i64 50, i64 55, i64 60, i64 65, i64 70, i64 75>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[TMP0]] to <80 x float>* +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <80 x float>, <80 x float>* [[TMP1]], align 4 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <80 x float> [[WIDE_VEC]], <80 x float> undef, <16 x i32> <i32 0, i32 5, i32 10, i32 15, i32 20, i32 25, i32 30, i32 35, i32 40, i32 45, i32 50, i32 55, i32 60, i32 65, i32 70, i32 75> +; CHECK-NEXT: [[TMP2:%.*]] = fmul <16 x float> [[BROADCAST_SPLAT]], [[STRIDED_VEC]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds %data, %data* %d, i64 0, i32 0, <16 x i64> [[VEC_IND]] +; CHECK-NEXT: [[BC:%.*]] = bitcast <16 x float*> [[TMP3]] to <16 x <80 x float>*> +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x <80 x float>*> [[BC]], i32 0 +; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <80 x float>, <80 x float>* [[TMP4]], align 4 +; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <80 x float> [[WIDE_VEC1]], <80 x float> undef, <16 x i32> <i32 0, i32 5, i32 10, i32 15, i32 20, i32 25, i32 30, i32 35, i32 40, i32 45, i32 50, i32 55, i32 60, i32 65, i32 70, i32 75> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <16 x float> [[STRIDED_VEC2]], [[TMP2]] +; CHECK-NEXT: call void @llvm.masked.scatter.v16f32(<16 x float> [[TMP5]], <16 x float*> [[TMP3]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80> +; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body %data = type { [32000 x float], [3 x i32], [4 x i8], [32000 x float] } diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index ec67e632efb..bda4b2454ee 100755 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -16,97 +16,23 @@ target triple = "x86_64-apple-macosx10.11.0" define void @_Z3fn1v() #0 { ; CHECK-LABEL: @_Z3fn1v( ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX:%.*]].next, %vector.body ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ -; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <16 x i64> [ -; CHECK-NEXT: [[SHL:%.*]] = shl i64 %index, 1 -; CHECK-NEXT: %offset.idx = add i64 [[SHL]], 8 -; CHECK-NEXT: [[IND00:%.*]] = add i64 %offset.idx, 0 -; CHECK-NEXT: [[IND02:%.*]] = add i64 %offset.idx, 2 -; CHECK-NEXT: [[IND04:%.*]] = add i64 %offset.idx, 4 -; CHECK-NEXT: [[IND06:%.*]] = add i64 %offset.idx, 6 -; CHECK-NEXT: [[IND08:%.*]] = add i64 %offset.idx, 8 -; CHECK-NEXT: [[IND10:%.*]] = add i64 %offset.idx, 10 -; CHECK-NEXT: [[IND12:%.*]] = add i64 %offset.idx, 12 -; CHECK-NEXT: [[IND14:%.*]] = add i64 %offset.idx, 14 -; CHECK-NEXT: [[IND16:%.*]] = add i64 %offset.idx, 16 -; CHECK-NEXT: [[IND18:%.*]] = add i64 %offset.idx, 18 -; CHECK-NEXT: [[IND20:%.*]] = add i64 %offset.idx, 20 -; CHECK-NEXT: [[IND22:%.*]] = add i64 %offset.idx, 22 -; CHECK-NEXT: [[IND24:%.*]] = add i64 %offset.idx, 24 -; CHECK-NEXT: [[IND26:%.*]] = add i64 %offset.idx, 26 -; CHECK-NEXT: [[IND28:%.*]] = add i64 %offset.idx, 28 -; CHECK-NEXT: [[IND30:%.*]] = add i64 %offset.idx, 30 +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30, i64 32, i64 34, i64 36, i64 38>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <16 x i64> [ <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30>, %vector.ph ], [ [[VEC_IND_NEXT4:%.*]], %vector.body ] ; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <16 x i64> <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>, [[VEC_IND]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND00]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND02]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND04]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND06]] -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND08]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND10]] -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND12]] -; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND14]] -; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND16]] -; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND18]] -; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND20]] -; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND22]] -; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND24]] -; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND26]] -; CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND28]] -; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND30]] -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x [10 x i32]*> undef, [10 x i32]* [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x [10 x i32]*> [[TMP13]], [10 x i32]* [[TMP15]], i32 1 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x [10 x i32]*> [[TMP16]], [10 x i32]* [[TMP18]], i32 2 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <16 x [10 x i32]*> [[TMP19]], [10 x i32]* [[TMP21]], i32 3 -; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x [10 x i32]*> [[TMP22]], [10 x i32]* [[TMP24]], i32 4 -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <16 x [10 x i32]*> [[TMP25]], [10 x i32]* [[TMP27]], i32 5 -; CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x [10 x i32]*> [[TMP28]], [10 x i32]* [[TMP30]], i32 6 -; CHECK-NEXT: [[TMP34:%.*]] = insertelement <16 x [10 x i32]*> [[TMP31]], [10 x i32]* [[TMP33]], i32 7 -; CHECK-NEXT: [[TMP37:%.*]] = insertelement <16 x [10 x i32]*> [[TMP34]], [10 x i32]* [[TMP36]], i32 8 -; CHECK-NEXT: [[TMP40:%.*]] = insertelement <16 x [10 x i32]*> [[TMP37]], [10 x i32]* [[TMP39]], i32 9 -; CHECK-NEXT: [[TMP43:%.*]] = insertelement <16 x [10 x i32]*> [[TMP40]], [10 x i32]* [[TMP42]], i32 10 -; CHECK-NEXT: [[TMP46:%.*]] = insertelement <16 x [10 x i32]*> [[TMP43]], [10 x i32]* [[TMP45]], i32 11 -; CHECK-NEXT: [[TMP49:%.*]] = insertelement <16 x [10 x i32]*> [[TMP46]], [10 x i32]* [[TMP48]], i32 12 -; CHECK-NEXT: [[TMP52:%.*]] = insertelement <16 x [10 x i32]*> [[TMP49]], [10 x i32]* [[TMP51]], i32 13 -; CHECK-NEXT: [[TMP55:%.*]] = insertelement <16 x [10 x i32]*> [[TMP52]], [10 x i32]* [[TMP54]], i32 14 -; CHECK-NEXT: [[TMP58:%.*]] = insertelement <16 x [10 x i32]*> [[TMP55]], [10 x i32]* [[TMP57]], i32 15 -; CHECK-NEXT: [[TMP59:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]] -; CHECK-NEXT: [[TMP61:%.*]] = extractelement <16 x i64> [[TMP59]], i32 0 -; CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP12]], i64 [[TMP61]], i64 0 -; CHECK-NEXT: [[TMP65:%.*]] = extractelement <16 x i64> [[TMP59]], i32 1 -; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP15]], i64 [[TMP65]], i64 0 -; CHECK-NEXT: [[TMP69:%.*]] = extractelement <16 x i64> [[TMP59]], i32 2 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP18]], i64 [[TMP69]], i64 0 -; CHECK-NEXT: [[TMP73:%.*]] = extractelement <16 x i64> [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP21]], i64 [[TMP73]], i64 0 -; CHECK-NEXT: [[TMP77:%.*]] = extractelement <16 x i64> [[TMP59]], i32 4 -; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP24]], i64 [[TMP77]], i64 0 -; CHECK-NEXT: [[TMP81:%.*]] = extractelement <16 x i64> [[TMP59]], i32 5 -; CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP27]], i64 [[TMP81]], i64 0 -; CHECK-NEXT: [[TMP85:%.*]] = extractelement <16 x i64> [[TMP59]], i32 6 -; CHECK-NEXT: [[TMP86:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP30]], i64 [[TMP85]], i64 0 -; CHECK-NEXT: [[TMP89:%.*]] = extractelement <16 x i64> [[TMP59]], i32 7 -; CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP33]], i64 [[TMP89]], i64 0 -; CHECK-NEXT: [[TMP93:%.*]] = extractelement <16 x i64> [[TMP59]], i32 8 -; CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP36]], i64 [[TMP93]], i64 0 -; CHECK-NEXT: [[TMP97:%.*]] = extractelement <16 x i64> [[TMP59]], i32 9 -; CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP39]], i64 [[TMP97]], i64 0 -; CHECK-NEXT: [[TMP101:%.*]] = extractelement <16 x i64> [[TMP59]], i32 10 -; CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP42]], i64 [[TMP101]], i64 0 -; CHECK-NEXT: [[TMP105:%.*]] = extractelement <16 x i64> [[TMP59]], i32 11 -; CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP45]], i64 [[TMP105]], i64 0 -; CHECK-NEXT: [[TMP109:%.*]] = extractelement <16 x i64> [[TMP59]], i32 12 -; CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP48]], i64 [[TMP109]], i64 0 -; CHECK-NEXT: [[TMP113:%.*]] = extractelement <16 x i64> [[TMP59]], i32 13 -; CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP51]], i64 [[TMP113]], i64 0 -; CHECK-NEXT: [[TMP117:%.*]] = extractelement <16 x i64> [[TMP59]], i32 14 -; CHECK-NEXT: [[TMP118:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP54]], i64 [[TMP117]], i64 0 -; CHECK-NEXT: [[TMP121:%.*]] = extractelement <16 x i64> [[TMP59]], i32 15 -; CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP57]], i64 [[TMP121]], i64 0 -; CHECK-NEXT: [[VECTORGEP:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP58]], <16 x i64> [[TMP59]], i64 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v16i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[VECTORGEP]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) -; CHECK: [[STEP_ADD:%.*]] = add <16 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> -; CHECK: [[STEP_ADD4:%.*]] = add <16 x i64> [[VEC_IND3]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, <16 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP11]], <16 x i64> [[TMP12]], i64 0 +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[TMP13]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i64> [[VEC_IND3]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> +; CHECK-NEXT: [[TMP15:%.*]] = add nsw <16 x i64> [[TMP10]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP11]], <16 x i64> [[TMP15]], i64 0 +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[TMP16]], i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> +; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <16 x i64> [[VEC_IND3]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> +; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body +; entry: %0 = load i32, i32* @c, align 4 %cmp34 = icmp sgt i32 %0, 8 diff --git a/llvm/test/Transforms/LoopVectorize/vector-geps.ll b/llvm/test/Transforms/LoopVectorize/vector-geps.ll new file mode 100644 index 00000000000..bd79499d5d3 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/vector-geps.ll @@ -0,0 +1,61 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" + +; CHECK-LABEL: @vector_gep_stored( +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* %b, <4 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32*, i32** %a, i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32** [[TMP2]] to <4 x i32*>* +; CHECK-NEXT: store <4 x i32*> [[TMP1]], <4 x i32*>* [[TMP3]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> +; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body +; +define void @vector_gep_stored(i32** %a, i32 *%b, i64 %n) { +entry: + br label %for.body + +for.body: + %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] + %tmp0 = getelementptr inbounds i32, i32* %b, i64 %i + %tmp1 = getelementptr inbounds i32*, i32** %a, i64 %i + store i32* %tmp0, i32** %tmp1, align 8 + %i.next = add nuw nsw i64 %i, 1 + %cond = icmp slt i64 %i.next, %n + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void +} + +; CHECK-LABEL: @uniform_vector_gep_stored( +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* %b, i64 1 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32*> undef, i32* [[TMP1]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32*> [[DOTSPLATINSERT]], <4 x i32*> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32*, i32** %a, i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32** [[TMP2]] to <4 x i32*>* +; CHECK-NEXT: store <4 x i32*> [[DOTSPLAT]], <4 x i32*>* [[TMP3]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body +; +define void @uniform_vector_gep_stored(i32** %a, i32 *%b, i64 %n) { +entry: + br label %for.body + +for.body: + %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] + %tmp0 = getelementptr inbounds i32, i32* %b, i64 1 + %tmp1 = getelementptr inbounds i32*, i32** %a, i64 %i + store i32* %tmp0, i32** %tmp1, align 8 + %i.next = add nuw nsw i64 %i, 1 + %cond = icmp slt i64 %i.next, %n + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void +} |