diff options
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 34 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll | 53 |
2 files changed, 82 insertions, 5 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c9914fa0f17..ad60b8244f1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -407,6 +407,7 @@ namespace { SDValue getMergedConstantVectorStore(SelectionDAG &DAG, SDLoc SL, ArrayRef<MemOpLink> Stores, + SmallVectorImpl<SDValue> &Chains, EVT Ty) const; /// This is a helper function for MergeConsecutiveStores. When the source @@ -10817,11 +10818,15 @@ struct BaseIndexOffset { SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG, SDLoc SL, ArrayRef<MemOpLink> Stores, + SmallVectorImpl<SDValue> &Chains, EVT Ty) const { SmallVector<SDValue, 8> BuildVector; - for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) - BuildVector.push_back(cast<StoreSDNode>(Stores[I].MemNode)->getValue()); + for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) { + StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode); + Chains.push_back(St->getChain()); + BuildVector.push_back(St->getValue()); + } return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector); } @@ -10846,6 +10851,8 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( LatestNodeUsed = i; } + SmallVector<SDValue, 8> Chains; + // The latest Node in the DAG. LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; SDLoc DL(StoreNodes[0].MemNode); @@ -10863,7 +10870,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); if (IsConstantSrc) { - StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty); + StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty); } else { SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i < NumStores; ++i) { @@ -10873,6 +10880,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( if (Val.getValueType() != MemVT) return false; Ops.push_back(Val); + Chains.push_back(St->getChain()); } // Build the extracted vector elements back into a vector. @@ -10892,6 +10900,8 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( for (unsigned i = 0; i < NumStores; ++i) { unsigned Idx = IsLE ? (NumStores - 1 - i) : i; StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); + Chains.push_back(St->getChain()); + SDValue Val = St->getValue(); StoreInt <<= ElementSizeBytes * 8; if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { @@ -10908,7 +10918,10 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); } - SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal, + assert(!Chains.empty()); + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, @@ -11360,6 +11373,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (NumElem < 2) return false; + // Collect the chains from all merged stores. + SmallVector<SDValue, 8> MergeStoreChains; + MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain()); + // The latest Node in the DAG. unsigned LatestNodeUsed = 0; for (unsigned i=1; i<NumElem; ++i) { @@ -11369,6 +11386,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // latest store node which is *used* and replaced by the wide store. if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) LatestNodeUsed = i; + + MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain()); } LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; @@ -11386,12 +11405,17 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { SDLoc LoadDL(LoadNodes[0].MemNode); SDLoc StoreDL(StoreNodes[0].MemNode); + // The merged loads are required to have the same chain, so using the first's + // chain is acceptable. SDValue NewLoad = DAG.getLoad( JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign); + SDValue NewStoreChain = + DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains); + SDValue NewStore = DAG.getStore( - LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(), + NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, FirstStoreAlign); // Replace one of the loads with the new load. diff --git a/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll b/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll new file mode 100644 index 00000000000..97549b65a99 --- /dev/null +++ b/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll @@ -0,0 +1,53 @@ +; REQUIRES: asserts +; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck -check-prefix=X86 %s +; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -debug-only=isel < %s 2>&1 | FileCheck -check-prefix=DBGDAG %s + +; It's OK to merge the load / store of the first 2 components, but +; they must not be placed on the same chain after merging. + +; X86-LABEL: {{^}}merge_store_partial_overlap_load: +; X86-DAG: movw ([[BASEREG:%[a-z]+]]), [[LO2:%[a-z]+]] +; X86-DAG: movb 2([[BASEREG]]), [[HI1:%[a-z]+]] + +; X86-NEXT: movw [[LO2]], 1([[BASEREG]]) +; X86-NEXT: movb [[HI1]], 3([[BASEREG]]) +; X86-NEXT: retq + +; DBGDAG-LABEL: Optimized lowered selection DAG: BB#0 'merge_store_partial_overlap_load:' +; DBGDAG: [[ENTRYTOKEN:t[0-9]+]]: ch = EntryToken +; DBGDAG-DAG: [[TWO:t[0-9]+]]: i64 = Constant<2> +; DBGDAG-DAG: [[BASEPTR:t[0-9]+]]: i64,ch = CopyFromReg [[ENTRYTOKEN]], +; DBGDAG-DAG: [[ADDPTR:t[0-9]+]]: i64 = add [[BASEPTR]], [[TWO]] + +; DBGDAG-DAG: [[LD2:t[0-9]+]]: i16,ch = load [[ENTRYTOKEN]], [[BASEPTR]], t{{[0-9]+}}<LD2[%tmp81](align=1)> +; DBGDAG-DAG: [[LD1:t[0-9]+]]: i8,ch = load [[ENTRYTOKEN]], [[ADDPTR]], t{{[0-9]+}}<LD1[%tmp12]> + +; DBGDAG: [[LOADTOKEN:t[0-9]+]]: ch = TokenFactor [[LD2]]:1, [[LD1]]:1 + +; DBGDAG-DAG: [[ST2:t[0-9]+]]: ch = store [[LOADTOKEN]], [[LD2]], t{{[0-9]+}}, t{{[0-9]+}}<ST2[%tmp10](align=1)> +; DBGDAG-DAG: [[ST1:t[0-9]+]]: ch = store [[ST2]], [[LD1]], t{{[0-9]+}}, t{{[0-9]+}}<ST1[%tmp14]> +; DBGDAG: X86ISD::RET_FLAG [[ST1]], + +; DBGDAG: Type-legalized selection DAG: BB#0 'merge_store_partial_overlap_load:' +define void @merge_store_partial_overlap_load([4 x i8]* %tmp) { + %tmp8 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 0 + %tmp10 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 1 + %tmp12 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 2 + %tmp14 = getelementptr [4 x i8], [4 x i8]* %tmp, i32 0, i8 3 + + %tmp9 = load i8, i8* %tmp8, align 1 ; base + 0 + %tmp11 = load i8, i8* %tmp10, align 1 ; base + 1 + %tmp13 = load i8, i8* %tmp12, align 1 ; base + 2 + + store i8 %tmp9, i8* %tmp10, align 1 ; base + 1 + store i8 %tmp11, i8* %tmp12, align 1 ; base + 2 + store i8 %tmp13, i8* %tmp14, align 1 ; base + 3 + +; Should emit +; load base + 0, base + 1 +; store base + 1, base + 2 +; load base + 2 +; store base + 3 + + ret void +} |

