diff options
author | Suyog Sarda <suyog.sarda@samsung.com> | 2014-12-12 12:53:44 +0000 |
---|---|---|
committer | Suyog Sarda <suyog.sarda@samsung.com> | 2014-12-12 12:53:44 +0000 |
commit | 384095e65c9d383309d876d2b0595bd976215da3 (patch) | |
tree | 909980fd0a0bd7ec88fe661e9c76a5be5c556154 /llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | |
parent | 01236e3eca6b7fba347fb6b88c6fdcd7624d45ad (diff) | |
download | bcm5719-llvm-384095e65c9d383309d876d2b0595bd976215da3.tar.gz bcm5719-llvm-384095e65c9d383309d876d2b0595bd976215da3.zip |
This patch recognizes (+ (+ v0, v1) (+ v2, v3)), reorders them for bundling into vector of loads,
and vectorizes it.
Test case :
float hadd(float* a) {
return (a[0] + a[1]) + (a[2] + a[3]);
}
AArch64 assembly before patch :
ldp s0, s1, [x0]
ldp s2, s3, [x0, #8]
fadd s0, s0, s1
fadd s1, s2, s3
fadd s0, s0, s1
ret
AArch64 assembly after patch :
ldp d0, d1, [x0]
fadd v0.2s, v0.2s, v1.2s
faddp s0, v0.2s
ret
Reviewed Link : http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20141208/248531.html
llvm-svn: 224119
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 26 |
1 files changed, 24 insertions, 2 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 44bfea14670..dafda21da8d 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -439,6 +439,13 @@ public: /// \returns true if the memory operations A and B are consecutive. bool isConsecutiveAccess(Value *A, Value *B); + /// For consecutive loads (+(+ v0, v1)(+ v2, v3)), Left had v0 and v2 + /// while Right had v1 and v3, which prevented bundling them into + /// a vector of loads. Rorder them so that Left now has v0 and v1 + /// while Right has v2 and v3 enabling their bundling into a vector. + void reorderIfConsecutiveLoads(SmallVectorImpl<Value *> &Left, + SmallVectorImpl<Value *> &Right); + /// \brief Perform LICM and CSE on the newly generated gather sequences. void optimizeGatherSequence(); @@ -1234,6 +1241,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) { ValueList Left, Right; reorderInputsAccordingToOpcode(VL, Left, Right); + reorderIfConsecutiveLoads (Left, Right); buildTree_rec(Left, Depth + 1); buildTree_rec(Right, Depth + 1); return; @@ -1818,6 +1826,19 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { return X == PtrSCEVB; } +void BoUpSLP::reorderIfConsecutiveLoads(SmallVectorImpl<Value *> &Left, + SmallVectorImpl<Value *> &Right) { + for (unsigned i = 0, e = Left.size(); i < e - 1; ++i) { + if (!isa<LoadInst>(Left[i]) || !isa<LoadInst>(Right[i])) + return; + if (!(isConsecutiveAccess(Left[i], Right[i]) && + isConsecutiveAccess(Right[i], Left[i + 1]))) + continue; + else + std::swap(Left[i + 1], Right[i]); + } +} + void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) { Instruction *VL0 = cast<Instruction>(VL[0]); BasicBlock::iterator NextInst = VL0; @@ -2048,9 +2069,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::Or: case Instruction::Xor: { ValueList LHSVL, RHSVL; - if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) + if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) { reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL); - else + reorderIfConsecutiveLoads(LHSVL, RHSVL); + } else for (int i = 0, e = E->Scalars.size(); i < e; ++i) { LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0)); RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1)); |