summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp34
-rw-r--r--llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll53
2 files changed, 82 insertions, 5 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c9914fa0f17..ad60b8244f1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -407,6 +407,7 @@ namespace {
SDValue getMergedConstantVectorStore(SelectionDAG &DAG,
SDLoc SL,
ArrayRef<MemOpLink> Stores,
+ SmallVectorImpl<SDValue> &Chains,
EVT Ty) const;
/// This is a helper function for MergeConsecutiveStores. When the source
@@ -10817,11 +10818,15 @@ struct BaseIndexOffset {
SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG,
SDLoc SL,
ArrayRef<MemOpLink> Stores,
+ SmallVectorImpl<SDValue> &Chains,
EVT Ty) const {
SmallVector<SDValue, 8> BuildVector;
- for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I)
- BuildVector.push_back(cast<StoreSDNode>(Stores[I].MemNode)->getValue());
+ for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
+ StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
+ Chains.push_back(St->getChain());
+ BuildVector.push_back(St->getValue());
+ }
return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector);
}
@@ -10846,6 +10851,8 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
LatestNodeUsed = i;
}
+ SmallVector<SDValue, 8> Chains;
+
// The latest Node in the DAG.
LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
SDLoc DL(StoreNodes[0].MemNode);
@@ -10863,7 +10870,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
if (IsConstantSrc) {
- StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty);
+ StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty);
} else {
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumStores; ++i) {
@@ -10873,6 +10880,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
if (Val.getValueType() != MemVT)
return false;
Ops.push_back(Val);
+ Chains.push_back(St->getChain());
}
// Build the extracted vector elements back into a vector.
@@ -10892,6 +10900,8 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
for (unsigned i = 0; i < NumStores; ++i) {
unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+ Chains.push_back(St->getChain());
+
SDValue Val = St->getValue();
StoreInt <<= ElementSizeBytes * 8;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
@@ -10908,7 +10918,10 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
}
- SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal,
+ assert(!Chains.empty());
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
false, false,
@@ -11360,6 +11373,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (NumElem < 2)
return false;
+ // Collect the chains from all merged stores.
+ SmallVector<SDValue, 8> MergeStoreChains;
+ MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
+
// The latest Node in the DAG.
unsigned LatestNodeUsed = 0;
for (unsigned i=1; i<NumElem; ++i) {
@@ -11369,6 +11386,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// latest store node which is *used* and replaced by the wide store.
if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
LatestNodeUsed = i;
+
+ MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
}
LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
@@ -11386,12 +11405,17 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
SDLoc LoadDL(LoadNodes[0].MemNode);
SDLoc StoreDL(StoreNodes[0].MemNode);
+ // The merged loads are required to have the same chain, so using the first's
+ // chain is acceptable.
SDValue NewLoad = DAG.getLoad(
JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign);
+ SDValue NewStoreChain =
+ DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
+
SDValue NewStore = DAG.getStore(
- LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(),
+ NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), false, false, FirstStoreAlign);
// Replace one of the loads with the new load.
diff --git a/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll b/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll
new file mode 100644
index 00000000000..97549b65a99
--- /dev/null
+++ b/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll
@@ -0,0 +1,53 @@
+; REQUIRES: asserts
+; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck -check-prefix=X86 %s
+; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -debug-only=isel < %s 2>&1 | FileCheck -check-prefix=DBGDAG %s
+
+; It's OK to merge the load / store of the first 2 components, but
+; they must not be placed on the same chain after merging.
+
+; X86-LABEL: {{^}}merge_store_partial_overlap_load:
+; X86-DAG: movw ([[BASEREG:%[a-z]+]]), [[LO2:%[a-z]+]]
+; X86-DAG: movb 2([[BASEREG]]), [[HI1:%[a-z]+]]
+
+; X86-NEXT: movw [[LO2]], 1([[BASEREG]])
+; X86-NEXT: movb [[HI1]], 3([[BASEREG]])
+; X86-NEXT: retq
+
+; DBGDAG-LABEL: Optimized lowered selection DAG: BB#0 'merge_store_partial_overlap_load:'
+; DBGDAG: [[ENTRYTOKEN:t[0-9]+]]: ch = EntryToken
+; DBGDAG-DAG: [[TWO:t[0-9]+]]: i64 = Constant<2>
+; DBGDAG-DAG: [[BASEPTR:t[0-9]+]]: i64,ch = CopyFromReg [[ENTRYTOKEN]],
+; DBGDAG-DAG: [[ADDPTR:t[0-9]+]]: i64 = add [[BASEPTR]], [[TWO]]
+
+; DBGDAG-DAG: [[LD2:t[0-9]+]]: i16,ch = load [[ENTRYTOKEN]], [[BASEPTR]], t{{[0-9]+}}<LD2[%tmp81](align=1)>
+; DBGDAG-DAG: [[LD1:t[0-9]+]]: i8,ch = load [[ENTRYTOKEN]], [[ADDPTR]], t{{[0-9]+}}<LD1[%tmp12]>
+
+; DBGDAG: [[LOADTOKEN:t[0-9]+]]: ch = TokenFactor [[LD2]]:1, [[LD1]]:1
+
+; DBGDAG-DAG: [[ST2:t[0-9]+]]: ch = store [[LOADTOKEN]], [[LD2]], t{{[0-9]+}}, t{{[0-9]+}}<ST2[%tmp10](align=1)>
+; DBGDAG-DAG: [[ST1:t[0-9]+]]: ch = store [[ST2]], [[LD1]], t{{[0-9]+}}, t{{[0-9]+}}<ST1[%tmp14]>
+; DBGDAG: X86ISD::RET_FLAG [[ST1]],
+
+; DBGDAG: Type-legalized selection DAG: BB#0 'merge_store_partial_overlap_load:'
+define void @merge_store_partial_overlap_load([4 x i8]* %tmp) {
+ %tmp8 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 0
+ %tmp10 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 1
+ %tmp12 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 2
+ %tmp14 = getelementptr [4 x i8], [4 x i8]* %tmp, i32 0, i8 3
+
+ %tmp9 = load i8, i8* %tmp8, align 1 ; base + 0
+ %tmp11 = load i8, i8* %tmp10, align 1 ; base + 1
+ %tmp13 = load i8, i8* %tmp12, align 1 ; base + 2
+
+ store i8 %tmp9, i8* %tmp10, align 1 ; base + 1
+ store i8 %tmp11, i8* %tmp12, align 1 ; base + 2
+ store i8 %tmp13, i8* %tmp14, align 1 ; base + 3
+
+; Should emit
+; load base + 0, base + 1
+; store base + 1, base + 2
+; load base + 2
+; store base + 3
+
+ ret void
+}
OpenPOWER on IntegriCloud