diff options
author | Michael Kruse <llvm@meinersbur.de> | 2018-12-20 04:58:07 +0000 |
---|---|---|
committer | Michael Kruse <llvm@meinersbur.de> | 2018-12-20 04:58:07 +0000 |
commit | 978ba61536c2cdafa8454b7330c5d8e58d0d5048 (patch) | |
tree | a99ca94692acf53f47844710992a914436b56747 /llvm/lib/Transforms | |
parent | feb18fe927d33e1d5a7bd873451ac9a1a76c7141 (diff) | |
download | bcm5719-llvm-978ba61536c2cdafa8454b7330c5d8e58d0d5048.tar.gz bcm5719-llvm-978ba61536c2cdafa8454b7330c5d8e58d0d5048.zip |
Introduce llvm.loop.parallel_accesses and llvm.access.group metadata.
The current llvm.mem.parallel_loop_access metadata has a problem in that
it uses LoopIDs. LoopID unfortunately is not loop identifier. It is
neither unique (there's even a regression test assigning the some LoopID
to multiple loops; can otherwise happen if passes such as LoopVersioning
make copies of entire loops) nor persistent (every time a property is
removed/added from a LoopID's MDNode, it will also receive a new LoopID;
this happens e.g. when calling Loop::setLoopAlreadyUnrolled()).
Since most loop transformation passes change the loop attributes (even
if it just to mark that a loop should not be processed again as
llvm.loop.isvectorized does, for the versioned and unversioned loop),
the parallel access information is lost for any subsequent pass.
This patch unlinks LoopIDs and parallel accesses.
llvm.mem.parallel_loop_access metadata on instruction is replaced by
llvm.access.group metadata. llvm.access.group points to a distinct
MDNode with no operands (avoiding the problem to ever need to add/remove
operands), called "access group". Alternatively, it can point to a list
of access groups. The LoopID then has an attribute
llvm.loop.parallel_accesses with all the access groups that are parallel
(no dependencies carries by this loop).
This intentionally avoid any kind of "ID". Loops that are clones/have
their attributes modifies retain the llvm.loop.parallel_accesses
attribute. Access instructions that a cloned point to the same access
group. It is not necessary for each access to have it's own "ID" MDNode,
but those memory access instructions with the same behavior can be
grouped together.
The behavior of llvm.mem.parallel_loop_access is not changed by this
patch, but should be considered deprecated.
Differential Revision: https://reviews.llvm.org/D52116
llvm-svn: 349725
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/GVNHoist.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/SROA.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/Scalarizer.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/InlineFunction.cpp | 26 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/Local.cpp | 11 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/LoopUtils.cpp | 49 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 3 |
12 files changed, 59 insertions, 59 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index ae158aeabdf..3e6a4965336 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -174,6 +174,9 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access); if (LoopMemParallelMD) L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD); + MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group); + if (AccessGroupMD) + L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD); StoreInst *S = Builder.CreateStore(L, Dest); // Alignment from the mem intrinsic will be better, so use it. @@ -182,6 +185,8 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { S->setMetadata(LLVMContext::MD_tbaa, CopyMD); if (LoopMemParallelMD) S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD); + if (AccessGroupMD) + S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD); if (auto *MT = dyn_cast<MemTransferInst>(MI)) { // non-atomics can be volatile diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 02ebb5ef294..b9b1e7b6e8a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -493,6 +493,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT case LLVMContext::MD_noalias: case LLVMContext::MD_nontemporal: case LLVMContext::MD_mem_parallel_loop_access: + case LLVMContext::MD_access_group: // All of these directly apply. NewLoad->setMetadata(ID, N); break; diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index a71ebdcd346..7603cf4d795 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -608,6 +608,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { LLVMContext::MD_align, LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null, + LLVMContext::MD_access_group, }; for (unsigned ID : KnownIDs) diff --git a/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/llvm/lib/Transforms/Scalar/GVNHoist.cpp index 0797ce9adea..76a42d7fe75 100644 --- a/llvm/lib/Transforms/Scalar/GVNHoist.cpp +++ b/llvm/lib/Transforms/Scalar/GVNHoist.cpp @@ -246,7 +246,7 @@ static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) { LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, LLVMContext::MD_noalias, LLVMContext::MD_range, LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, - LLVMContext::MD_invariant_group}; + LLVMContext::MD_invariant_group, LLVMContext::MD_access_group}; combineMetadata(ReplInst, I, KnownIDs, true); } diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp index c0c59d24dff..f48d3cc098a 100644 --- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -633,6 +633,8 @@ bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) { // Set Loop Versioning metaData for version loop. addStringMetadataToLoop(LVer.getVersionedLoop(), LICMVersioningMetaData); // Set "llvm.mem.parallel_loop_access" metaData to versioned loop. + // FIXME: "llvm.mem.parallel_loop_access" annotates memory access + // instructions, not loops. addStringMetadataToLoop(LVer.getVersionedLoop(), "llvm.mem.parallel_loop_access"); // Update version loop with aggressive aliasing assumption. diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 8756a1afcdd..e93f56da454 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -996,7 +996,8 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, // handled here, but combineMetadata doesn't support them yet unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, LLVMContext::MD_noalias, - LLVMContext::MD_invariant_group}; + LLVMContext::MD_invariant_group, + LLVMContext::MD_access_group}; combineMetadata(C, cpy, KnownIDs, true); // Remove the memcpy. diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 81eea0dee4e..d1a2dc0f89b 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2593,7 +2593,8 @@ private: } V = convertValue(DL, IRB, V, NewAllocaTy); StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); - Store->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access); + Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); if (AATags) Store->setAAMetadata(AATags); Pass.DeadInsts.insert(&SI); @@ -2662,7 +2663,8 @@ private: NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()), SI.isVolatile()); } - NewSI->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access); + NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); if (AATags) NewSI->setAAMetadata(AATags); if (SI.isVolatile()) @@ -3799,7 +3801,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { PartPtrTy, BasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); - PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); + PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); // Append this load onto the list of split loads so we can find it later // to rewrite the stores. @@ -3855,7 +3858,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { APInt(DL.getIndexSizeInBits(AS), PartOffset), PartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); - PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); + PStore->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n"); } diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index 3a6f8e6b095..5eb3fdab6d5 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -379,7 +379,8 @@ bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) { || Tag == LLVMContext::MD_invariant_load || Tag == LLVMContext::MD_alias_scope || Tag == LLVMContext::MD_noalias - || Tag == ParallelLoopAccessMDKind); + || Tag == ParallelLoopAccessMDKind + || Tag == LLVMContext::MD_access_group); } // Transfer metadata from Op to the instructions in CV if it is known diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index bda2ee2d8a3..0b220828d16 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -770,14 +771,16 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, UnwindDest->removePredecessor(InvokeBB); } -/// When inlining a call site that has !llvm.mem.parallel_loop_access metadata, -/// that metadata should be propagated to all memory-accessing cloned -/// instructions. +/// When inlining a call site that has !llvm.mem.parallel_loop_access or +/// llvm.access.group metadata, that metadata should be propagated to all +/// memory-accessing cloned instructions. static void PropagateParallelLoopAccessMetadata(CallSite CS, ValueToValueMapTy &VMap) { MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access); - if (!M) + MDNode *CallAccessGroup = + CS.getInstruction()->getMetadata(LLVMContext::MD_access_group); + if (!M && !CallAccessGroup) return; for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); @@ -789,11 +792,20 @@ static void PropagateParallelLoopAccessMetadata(CallSite CS, if (!NI) continue; - if (MDNode *PM = NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) { + if (M) { + if (MDNode *PM = + NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) { M = MDNode::concatenate(PM, M); NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); - } else if (NI->mayReadOrWriteMemory()) { - NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); + } else if (NI->mayReadOrWriteMemory()) { + NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); + } + } + + if (NI->mayReadOrWriteMemory()) { + MDNode *UnitedAccGroups = uniteAccessGroups( + NI->getMetadata(LLVMContext::MD_access_group), CallAccessGroup); + NI->setMetadata(LLVMContext::MD_access_group, UnitedAccGroups); } } } diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 79343a90a0e..831fb5751de 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -34,6 +34,7 @@ #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -2297,6 +2298,10 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, case LLVMContext::MD_mem_parallel_loop_access: K->setMetadata(Kind, MDNode::intersect(JMD, KMD)); break; + case LLVMContext::MD_access_group: + K->setMetadata(LLVMContext::MD_access_group, + intersectAccessGroups(K, J)); + break; case LLVMContext::MD_range: // If K does move, use most generic range. Otherwise keep the range of @@ -2353,7 +2358,8 @@ void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J, LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull, LLVMContext::MD_invariant_group, LLVMContext::MD_align, LLVMContext::MD_dereferenceable, - LLVMContext::MD_dereferenceable_or_null}; + LLVMContext::MD_dereferenceable_or_null, + LLVMContext::MD_access_group}; combineMetadata(K, J, KnownIDs, KDominatesJ); } @@ -2384,7 +2390,8 @@ void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) { LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, LLVMContext::MD_noalias, LLVMContext::MD_range, LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, - LLVMContext::MD_invariant_group, LLVMContext::MD_nonnull}; + LLVMContext::MD_invariant_group, LLVMContext::MD_nonnull, + LLVMContext::MD_access_group}; combineMetadata(ReplInst, I, KnownIDs, false); } diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 1d4f07ff1b9..3866395a32c 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -187,44 +187,14 @@ void llvm::initializeLoopPassPass(PassRegistry &Registry) { INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) } -static Optional<MDNode *> findOptionMDForLoopID(MDNode *LoopID, - StringRef Name) { - // Return none if LoopID is false. - if (!LoopID) - return None; - - // First operand should refer to the loop id itself. - assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); - assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); - - // Iterate over LoopID operands and look for MDString Metadata - for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { - MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); - if (!MD) - continue; - MDString *S = dyn_cast<MDString>(MD->getOperand(0)); - if (!S) - continue; - // Return true if MDString holds expected MetaData. - if (Name.equals(S->getString())) - return MD; - } - return None; -} - -static Optional<MDNode *> findOptionMDForLoop(const Loop *TheLoop, - StringRef Name) { - return findOptionMDForLoopID(TheLoop->getLoopID(), Name); -} - /// Find string metadata for loop /// /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an /// operand or null otherwise. If the string metadata is not found return /// Optional's not-a-value. -Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, +Optional<const MDOperand *> llvm::findStringMetadataForLoop(const Loop *TheLoop, StringRef Name) { - auto MD = findOptionMDForLoop(TheLoop, Name).getValueOr(nullptr); + MDNode *MD = findOptionMDForLoop(TheLoop, Name); if (!MD) return None; switch (MD->getNumOperands()) { @@ -239,19 +209,15 @@ Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop, StringRef Name) { - Optional<MDNode *> MD = findOptionMDForLoop(TheLoop, Name); - if (!MD.hasValue()) - return None; - MDNode *OptionNode = MD.getValue(); - if (OptionNode == nullptr) + MDNode *MD = findOptionMDForLoop(TheLoop, Name); + if (!MD) return None; - switch (OptionNode->getNumOperands()) { + switch (MD->getNumOperands()) { case 1: // When the value is absent it is interpreted as 'attribute set'. return true; case 2: - return mdconst::extract_or_null<ConstantInt>( - OptionNode->getOperand(1).get()); + return mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get()); } llvm_unreachable("unexpected number of options"); } @@ -325,8 +291,7 @@ Optional<MDNode *> llvm::makeFollowupLoopID( bool HasAnyFollowup = false; for (StringRef OptionName : FollowupOptions) { - MDNode *FollowupNode = - findOptionMDForLoopID(OrigLoopID, OptionName).getValueOr(nullptr); + MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName); if (!FollowupNode) continue; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index b98f2fff65e..ac09ce5a347 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1321,7 +1321,8 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, LLVMContext::MD_align, LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null, - LLVMContext::MD_mem_parallel_loop_access}; + LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}; combineMetadata(I1, I2, KnownIDs, true); // I1 and I2 are being combined into a single instruction. Its debug |