summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoman Tereshin <rtereshin@apple.com>2019-01-18 20:13:42 +0000
committerRoman Tereshin <rtereshin@apple.com>2019-01-18 20:13:42 +0000
commit85a0467a11bc748a242a858dfb950fce2956fa79 (patch)
tree362a299689de464045ab1f596d1acce202915d04
parentd4023bd2cb2606db1a734966b14f865a52c68285 (diff)
downloadbcm5719-llvm-85a0467a11bc748a242a858dfb950fce2956fa79.tar.gz
bcm5719-llvm-85a0467a11bc748a242a858dfb950fce2956fa79.zip
[CGP] Check for existing inttotpr before creating new one
Make sure CodeGenPrepare doesn't emit multiple inttoptr instructions of the same integer value while sinking address computations, but rather CSEs them on the fly: excessive inttoptr's confuse SCEV into thinking that related pointers have nothing to do with each other. This problem blocks LoadStoreVectorizer from vectorizing some of the loads / stores in a downstream target. Reviewed By: hfinkel Differential Revision: https://reviews.llvm.org/D56838 llvm-svn: 351582
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp17
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-cse-inttoptrs.ll40
2 files changed, 53 insertions, 4 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index c35f8666fa3..0c7c9deaa07 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -4664,13 +4664,22 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// will look through it and provide only the integer value. In that case,
// use it here.
if (!DL->isNonIntegralPointerType(Addr->getType())) {
+ const auto getResultPtr = [MemoryInst, Addr,
+ &Builder](Value *Reg) -> Value * {
+ for (User *U : Reg->users())
+ if (auto *I2P = dyn_cast<IntToPtrInst>(U))
+ if (I2P->getType() == Addr->getType() &&
+ I2P->getParent() == MemoryInst->getParent()) {
+ I2P->moveBefore(MemoryInst->getParent()->getFirstNonPHI());
+ return I2P;
+ }
+ return Builder.CreateIntToPtr(Reg, Addr->getType(), "sunkaddr");
+ };
if (!ResultPtr && AddrMode.BaseReg) {
- ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
- "sunkaddr");
+ ResultPtr = getResultPtr(AddrMode.BaseReg);
AddrMode.BaseReg = nullptr;
} else if (!ResultPtr && AddrMode.Scale == 1) {
- ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
- "sunkaddr");
+ ResultPtr = getResultPtr(AddrMode.ScaledReg);
AddrMode.Scale = 0;
}
}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-cse-inttoptrs.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-cse-inttoptrs.ll
new file mode 100644
index 00000000000..5f1fd98cbb7
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-cse-inttoptrs.ll
@@ -0,0 +1,40 @@
+; RUN: opt -mtriple=x86_64-- -codegenprepare %s -S -o - | FileCheck %s --check-prefix=CGP
+; RUN: opt -mtriple=x86_64-- -codegenprepare -load-store-vectorizer %s -S -o - | FileCheck %s --check-prefix=LSV
+
+; Make sure CodeGenPrepare doesn't emit multiple inttoptr instructions
+; of the same integer value while sinking address computations, but
+; rather CSEs them on the fly: excessive inttoptr's confuse SCEV
+; into thinking that related pointers have nothing to do with each other.
+;
+; Triggering this problem involves having just right addressing modes,
+; and verifying that the motivating pass (LoadStoreVectorizer) is able
+; to benefit from it - just right LSV-policies. Hence the atypical combination
+; of the target and datalayout / address spaces in this test.
+
+target datalayout = "p1:32:32:32"
+
+define void @main(i32 %tmp, i32 %off) {
+; CGP: = inttoptr
+; CGP-NOT: = inttoptr
+; LSV: = load <2 x float>
+; LSV: = load <2 x float>
+entry:
+ %tmp1 = inttoptr i32 %tmp to float addrspace(1)*
+ %arrayidx.i.7 = getelementptr inbounds float, float addrspace(1)* %tmp1, i32 %off
+ %add20.i.7 = add i32 %off, 1
+ %arrayidx22.i.7 = getelementptr inbounds float, float addrspace(1)* %tmp1, i32 %add20.i.7
+ br label %for.body
+
+for.body:
+ %tmp8 = phi float [ undef, %entry ], [ %tmp62, %for.body ]
+ %tmp28 = load float, float addrspace(1)* %arrayidx.i.7
+ %tmp29 = load float, float addrspace(1)* %arrayidx22.i.7
+ %arrayidx.i321.7 = getelementptr inbounds float, float addrspace(1)* %tmp1, i32 0
+ %tmp43 = load float, float addrspace(1)* %arrayidx.i321.7
+ %arrayidx22.i327.7 = getelementptr inbounds float, float addrspace(1)* %tmp1, i32 1
+ %tmp44 = load float, float addrspace(1)* %arrayidx22.i327.7
+ %tmp62 = tail call fast float @foo(float %tmp8, float %tmp44, float %tmp43, float %tmp29, float %tmp28)
+ br label %for.body
+}
+
+declare float @foo(float, float, float, float, float)
OpenPOWER on IntegriCloud