diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 79 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/MergeConsecutiveStores.ll | 33 |
2 files changed, 108 insertions, 4 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8297e841469..dc556fa63df 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9498,11 +9498,14 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { return false; // Perform an early exit check. Do not bother looking at stored values that - // are not constants or loads. + // are not constants, loads, or extracted vector elements. SDValue StoredVal = St->getValue(); bool IsLoadSrc = isa<LoadSDNode>(StoredVal); - if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) && - !IsLoadSrc) + bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || + isa<ConstantFPSDNode>(StoredVal); + bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT); + + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc) return false; // Only look at ends of store sequences. @@ -9644,7 +9647,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; // Store the constants into memory as one consecutive store. - if (!IsLoadSrc) { + if (IsConstantSrc) { unsigned LastLegalType = 0; unsigned LastLegalVectorType = 0; bool NonZero = false; @@ -9774,6 +9777,74 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { return true; } + // When extracting multiple vector elements, try to store them + // in one vector store rather than a sequence of scalar stores. + if (IsExtractVecEltSrc) { + unsigned NumElem = 0; + for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) { + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + if (TLI.isTypeLegal(Ty)) + NumElem = i + 1; + } + + // Make sure we have a legal type and something to merge. + if (NumElem < 2) + return false; + + unsigned EarliestNodeUsed = 0; + for (unsigned i=0; i < NumElem; ++i) { + // Find a chain for the new wide-store operand. Notice that some + // of the store nodes that we found may not be selected for inclusion + // in the wide store. The chain we use needs to be the chain of the + // earliest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) + EarliestNodeUsed = i; + } + + // The earliest Node in the DAG. + LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + SDLoc DL(StoreNodes[0].MemNode); + + SDValue StoredVal; + + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i < NumElem ; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue Val = St->getValue(); + // All of the operands of a BUILD_VECTOR must have the same type. + if (Val.getValueType() != MemVT) + return false; + Ops.push_back(Val); + } + + // Build the extracted vector elements back into a vector. + StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops); + + SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + false, false, + FirstInChain->getAlignment()); + + // Replace the first store with the new store + CombineTo(EarliestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumElem ; ++i) { + if (StoreNodes[i].MemNode == EarliestOp) + continue; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + while (!St->use_empty()) + DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); + deleteAndRecombine(St); + } + + return true; + } + // Below we handle the case of multiple consecutive stores that // come from multiple consecutive loads. We merge them into a single // wide load and a single wide store. diff --git a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll index dfdaea523fd..cf984a4f3a9 100644 --- a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll +++ b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll @@ -434,3 +434,36 @@ define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) { ; <label>:14 ret void } + +define void @merge_vec_element_store(<8 x float> %v, float* %ptr) { + %vecext0 = extractelement <8 x float> %v, i32 0 + %vecext1 = extractelement <8 x float> %v, i32 1 + %vecext2 = extractelement <8 x float> %v, i32 2 + %vecext3 = extractelement <8 x float> %v, i32 3 + %vecext4 = extractelement <8 x float> %v, i32 4 + %vecext5 = extractelement <8 x float> %v, i32 5 + %vecext6 = extractelement <8 x float> %v, i32 6 + %vecext7 = extractelement <8 x float> %v, i32 7 + %arrayidx1 = getelementptr inbounds float* %ptr, i64 1 + %arrayidx2 = getelementptr inbounds float* %ptr, i64 2 + %arrayidx3 = getelementptr inbounds float* %ptr, i64 3 + %arrayidx4 = getelementptr inbounds float* %ptr, i64 4 + %arrayidx5 = getelementptr inbounds float* %ptr, i64 5 + %arrayidx6 = getelementptr inbounds float* %ptr, i64 6 + %arrayidx7 = getelementptr inbounds float* %ptr, i64 7 + store float %vecext0, float* %ptr, align 4 + store float %vecext1, float* %arrayidx1, align 4 + store float %vecext2, float* %arrayidx2, align 4 + store float %vecext3, float* %arrayidx3, align 4 + store float %vecext4, float* %arrayidx4, align 4 + store float %vecext5, float* %arrayidx5, align 4 + store float %vecext6, float* %arrayidx6, align 4 + store float %vecext7, float* %arrayidx7, align 4 + ret void + +; CHECK-LABEL: merge_vec_element_store +; CHECK: vmovups +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +} + |