[LoadStoreVectorizer] Use getMinusScev() to compute the distance between two pointers.

Summary: Currently, isConsecutiveAccess() detects two pointers(PtrA and PtrB) as consecutive by comparing PtrB with BaseDelta+PtrA. This works when both pointers are factorized or both of them are not factorized. But isConsecutiveAccess() fails if one of the pointers is factorized but the other one is not. Here is an example: PtrA = 4 * (A + B) PtrB = 4 + 4A + 4B This patch uses getMinusSCEV() to compute the distance between two pointers. getMinusSCEV() allows combining the expressions and computing the simplified distance. Author: FarhanaAleen Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D49516 llvm-svn: 337471
author: Farhana Aleen <farhana.aleen@gmail.com> 2018-07-19 16:50:27 +0000
committer: Farhana Aleen <farhana.aleen@gmail.com> 2018-07-19 16:50:27 +0000
commit: 8c7a30baea219e8143b13e3e384ff713d8bb7c76 (patch)
tree: 6f6740fbacbb2020321e1c3833f2b13403986b17 /llvm/test/Transforms/LoadStoreVectorizer/AMDGPU
parent: d1cf276621a7382a0f8e1d6f70d317e3944ffbeb (diff)
download: bcm5719-llvm-8c7a30baea219e8143b13e3e384ff713d8bb7c76.tar.gz
bcm5719-llvm-8c7a30baea219e8143b13e3e384ff713d8bb7c76.zip
1 files changed, 49 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/complex-index.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/complex-index.ll
new file mode 100644
index 00000000000..220efd21fe1
--- /dev/null
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/complex-index.ll
@@ -0,0 +1,49 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s
+
+declare i64 @_Z12get_local_idj(i32)
+
+declare i64 @_Z12get_group_idj(i32)
+
+declare double @llvm.fmuladd.f64(double, double, double)
+
+; CHECK-LABEL: @factorizedVsNonfactorizedAccess(
+; CHECK: load <2 x float>
+; CHECK: store <2 x float>
+define amdgpu_kernel void @factorizedVsNonfactorizedAccess(float addrspace(1)* nocapture %c) {
+entry:
+  %call = tail call i64 @_Z12get_local_idj(i32 0)
+  %call1 = tail call i64 @_Z12get_group_idj(i32 0)
+  %div = lshr i64 %call, 4
+  %div2 = lshr i64 %call1, 3
+  %mul = shl i64 %div2, 7
+  %rem = shl i64 %call, 3
+  %mul3 = and i64 %rem, 120
+  %add = or i64 %mul, %mul3
+  %rem4 = shl i64 %call1, 7
+  %mul5 = and i64 %rem4, 896
+  %mul6 = shl nuw nsw i64 %div, 3
+  %add7 = add nuw i64 %mul5, %mul6
+  %mul9 = shl i64 %add7, 10
+  %add10 = add i64 %mul9, %add
+  %arrayidx = getelementptr inbounds float, float addrspace(1)* %c, i64 %add10
+  %load1 = load float, float addrspace(1)* %arrayidx, align 4
+  %conv = fpext float %load1 to double
+  %mul11 = fmul double %conv, 0x3FEAB481D8F35506
+  %conv12 = fptrunc double %mul11 to float
+  %conv18 = fpext float %conv12 to double
+  %storeval1 = tail call double @llvm.fmuladd.f64(double 0x3FF4FFAFBBEC946A, double 0.000000e+00, double %conv18)
+  %cstoreval1 = fptrunc double %storeval1 to float
+  store float %cstoreval1, float addrspace(1)* %arrayidx, align 4
+
+  %add23 = or i64 %add10, 1
+  %arrayidx24 = getelementptr inbounds float, float addrspace(1)* %c, i64 %add23
+  %load2 = load float, float addrspace(1)* %arrayidx24, align 4
+  %conv25 = fpext float %load2 to double
+  %mul26 = fmul double %conv25, 0x3FEAB481D8F35506
+  %conv27 = fptrunc double %mul26 to float
+  %conv34 = fpext float %conv27 to double
+  %storeval2 = tail call double @llvm.fmuladd.f64(double 0x3FF4FFAFBBEC946A, double 0.000000e+00, double %conv34)
+  %cstoreval2 = fptrunc double %storeval2 to float
+  store float %cstoreval2, float addrspace(1)* %arrayidx24, align 4
+  ret void
+}
+\ No newline at end of file
author	Farhana Aleen <farhana.aleen@gmail.com>	2018-07-19 16:50:27 +0000
committer	Farhana Aleen <farhana.aleen@gmail.com>	2018-07-19 16:50:27 +0000
commit	8c7a30baea219e8143b13e3e384ff713d8bb7c76 (patch)
tree	6f6740fbacbb2020321e1c3833f2b13403986b17 /llvm/test/Transforms/LoadStoreVectorizer/AMDGPU
parent	d1cf276621a7382a0f8e1d6f70d317e3944ffbeb (diff)
download	bcm5719-llvm-8c7a30baea219e8143b13e3e384ff713d8bb7c76.tar.gz bcm5719-llvm-8c7a30baea219e8143b13e3e384ff713d8bb7c76.zip