diff options
author | Wei Mi <wmi@google.com> | 2016-12-22 19:44:45 +0000 |
---|---|---|
committer | Wei Mi <wmi@google.com> | 2016-12-22 19:44:45 +0000 |
commit | a2f0b594c26b233044006264faec2657f1756963 (patch) | |
tree | c5d54b1fa3875d5a400d47b4546169163aef1dfe /llvm/lib/CodeGen/CodeGenPrepare.cpp | |
parent | 5e87c3465ec262cab99f1f832e0be1d0076c201c (diff) | |
download | bcm5719-llvm-a2f0b594c26b233044006264faec2657f1756963.tar.gz bcm5719-llvm-a2f0b594c26b233044006264faec2657f1756963.zip |
Redo store splitting in CodeGenPrepare.
This is a succeeding patch of https://reviews.llvm.org/D22840 to address the
issue when a value to be merged into an int64 pair is in a different BB. Redoing
the store splitting in CodeGenPrepare so we can match the pattern across multiple
BBs and move some instructions into the same BB. We still keep the code in dag
combine so that we can catch cases that show up after DAG combining runs.
Differential Revision: https://reviews.llvm.org/D25914
llvm-svn: 290365
Diffstat (limited to 'llvm/lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r-- | llvm/lib/CodeGen/CodeGenPrepare.cpp | 117 |
1 files changed, 117 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 3dea9eb6b1f..934b470f13b 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -131,6 +131,10 @@ static cl::opt<unsigned> FreqRatioToSkipMerge( cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio")); +static cl::opt<bool> ForceSplitStore( + "force-split-store", cl::Hidden, cl::init(false), + cl::desc("Force store splitting no matter what the target query says.")); + namespace { typedef SmallPtrSet<Instruction *, 16> SetOfInstrs; typedef PointerIntPair<Type *, 1, bool> TypeIsSExt; @@ -5358,6 +5362,117 @@ bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) { return false; } +/// For the instruction sequence of store below, F and I values +/// are bundled together as an i64 value before being stored into memory. +/// Sometimes it is more efficent to generate separate stores for F and I, +/// which can remove the bitwise instructions or sink them to colder places. +/// +/// (store (or (zext (bitcast F to i32) to i64), +/// (shl (zext I to i64), 32)), addr) --> +/// (store F, addr) and (store I, addr+4) +/// +/// Similarly, splitting for other merged store can also be beneficial, like: +/// For pair of {i32, i32}, i64 store --> two i32 stores. +/// For pair of {i32, i16}, i64 store --> two i32 stores. +/// For pair of {i16, i16}, i32 store --> two i16 stores. +/// For pair of {i16, i8}, i32 store --> two i16 stores. +/// For pair of {i8, i8}, i16 store --> two i8 stores. +/// +/// We allow each target to determine specifically which kind of splitting is +/// supported. +/// +/// The store patterns are commonly seen from the simple code snippet below +/// if only std::make_pair(...) is sroa transformed before inlined into hoo. +/// void goo(const std::pair<int, float> &); +/// hoo() { +/// ... +/// goo(std::make_pair(tmp, ftmp)); +/// ... +/// } +/// +/// Although we already have similar splitting in DAG Combine, we duplicate +/// it in CodeGenPrepare to catch the case in which pattern is across +/// multiple BBs. The logic in DAG Combine is kept to catch case generated +/// during code expansion. +static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, + const TargetLowering &TLI) { + // Handle simple but common cases only. + Type *StoreType = SI.getValueOperand()->getType(); + if (DL.getTypeStoreSizeInBits(StoreType) != DL.getTypeSizeInBits(StoreType) || + DL.getTypeSizeInBits(StoreType) == 0) + return false; + + unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2; + Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize); + if (DL.getTypeStoreSizeInBits(SplitStoreType) != + DL.getTypeSizeInBits(SplitStoreType)) + return false; + + // Match the following patterns: + // (store (or (zext LValue to i64), + // (shl (zext HValue to i64), 32)), HalfValBitSize) + // or + // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize) + // (zext LValue to i64), + // Expect both operands of OR and the first operand of SHL have only + // one use. + Value *LValue, *HValue; + if (!match(SI.getValueOperand(), + m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))), + m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))), + m_SpecificInt(HalfValBitSize)))))) + return false; + + // Check LValue and HValue are int with size less or equal than 32. + if (!LValue->getType()->isIntegerTy() || + DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize || + !HValue->getType()->isIntegerTy() || + DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize) + return false; + + // If LValue/HValue is a bitcast instruction, use the EVT before bitcast + // as the input of target query. + auto *LBC = dyn_cast<BitCastInst>(LValue); + auto *HBC = dyn_cast<BitCastInst>(HValue); + EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType()) + : EVT::getEVT(LValue->getType()); + EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType()) + : EVT::getEVT(HValue->getType()); + if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy)) + return false; + + // Start to split store. + IRBuilder<> Builder(SI.getContext()); + Builder.SetInsertPoint(&SI); + + // If LValue/HValue is a bitcast in another BB, create a new one in current + // BB so it may be merged with the splitted stores by dag combiner. + if (LBC && LBC->getParent() != SI.getParent()) + LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType()); + if (HBC && HBC->getParent() != SI.getParent()) + HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType()); + + auto CreateSplitStore = [&](Value *V, bool Upper) { + V = Builder.CreateZExtOrBitCast(V, SplitStoreType); + Value *Addr = Builder.CreateBitCast( + SI.getOperand(1), + SplitStoreType->getPointerTo(SI.getPointerAddressSpace())); + if (Upper) + Addr = Builder.CreateGEP( + SplitStoreType, Addr, + ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1)); + Builder.CreateAlignedStore( + V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment()); + }; + + CreateSplitStore(LValue, false); + CreateSplitStore(HValue, true); + + // Delete the old store. + SI.eraseFromParent(); + return true; +} + bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { // Bail out if we inserted the instruction to prevent optimizations from // stepping on each other's toes. @@ -5422,6 +5537,8 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { } if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (TLI && splitMergedValStore(*SI, *DL, *TLI)) + return true; SI->setMetadata(LLVMContext::MD_invariant_group, nullptr); if (TLI) { unsigned AS = SI->getPointerAddressSpace(); |