diff options
author | Cong Hou <congh@google.com> | 2016-02-24 23:40:36 +0000 |
---|---|---|
committer | Cong Hou <congh@google.com> | 2016-02-24 23:40:36 +0000 |
commit | 4ce0280a416c5aa9121e98652d3bfaf166d9ee78 (patch) | |
tree | ab0cea9abd439dfed12a970fc0349cc0735fbbca /llvm/lib/CodeGen/SelectionDAG | |
parent | c445d387769d05753f678f6fb88403e2e1b18710 (diff) | |
download | bcm5719-llvm-4ce0280a416c5aa9121e98652d3bfaf166d9ee78.tar.gz bcm5719-llvm-4ce0280a416c5aa9121e98652d3bfaf166d9ee78.zip |
Detecte vector reduction operations just before instruction selection.
(This is the second attemp to commit this patch, after fixing pr26652 & pr26653).
This patch detects vector reductions before instruction selection. Vector
reductions are vectorized reduction operations, and for such operations we have
freedom to reorganize the elements of the result as long as the reduction of them
stay unchanged. This will enable some reduction pattern recognition during
instruction combine such as SAD/dot-product on X86. A flag is added to
SDNodeFlags to mark those vector reduction nodes to be checked during instruction
combine.
To detect those vector reductions, we search def-use chains starting from the
given instruction, and check if all uses fall into two categories:
1. Reduction with another vector.
2. Reduction on all elements.
in which 2 is detected by recognizing the pattern that the loop vectorizer
generates to reduce all elements in the vector outside of the loop, which
includes several ShuffleVector and one ExtractElement instructions.
Differential revision: http://reviews.llvm.org/D15250
llvm-svn: 261804
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 130 |
1 files changed, 130 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 27a131e4243..ec25d7c5ecc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2317,6 +2317,129 @@ void SelectionDAGBuilder::visitFSub(const User &I) { visitBinary(I, ISD::FSUB); } +/// Checks if the given instruction performs a vector reduction, in which case +/// we have the freedom to alter the elements in the result as long as the +/// reduction of them stays unchanged. +static bool isVectorReductionOp(const User *I) { + const Instruction *Inst = dyn_cast<Instruction>(I); + if (!Inst || !Inst->getType()->isVectorTy()) + return false; + + auto OpCode = Inst->getOpcode(); + switch (OpCode) { + case Instruction::Add: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + break; + case Instruction::FAdd: + case Instruction::FMul: + if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) + if (FPOp->getFastMathFlags().unsafeAlgebra()) + break; + // Fall through. + default: + return false; + } + + unsigned ElemNum = Inst->getType()->getVectorNumElements(); + unsigned ElemNumToReduce = ElemNum; + + // Do DFS search on the def-use chain from the given instruction. We only + // allow four kinds of operations during the search until we reach the + // instruction that extracts the first element from the vector: + // + // 1. The reduction operation of the same opcode as the given instruction. + // + // 2. PHI node. + // + // 3. ShuffleVector instruction together with a reduction operation that + // does a partial reduction. + // + // 4. ExtractElement that extracts the first element from the vector, and we + // stop searching the def-use chain here. + // + // 3 & 4 above perform a reduction on all elements of the vector. We push defs + // from 1-3 to the stack to continue the DFS. The given instruction is not + // a reduction operation if we meet any other instructions other than those + // listed above. + + SmallVector<const User *, 16> UsersToVisit{Inst}; + SmallPtrSet<const User *, 16> Visited; + bool ReduxExtracted = false; + + while (!UsersToVisit.empty()) { + auto User = UsersToVisit.back(); + UsersToVisit.pop_back(); + if (!Visited.insert(User).second) + continue; + + for (const auto &U : User->users()) { + auto Inst = dyn_cast<Instruction>(U); + if (!Inst) + return false; + + if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) { + if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) + if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().unsafeAlgebra()) + return false; + UsersToVisit.push_back(U); + } else if (const ShuffleVectorInst *ShufInst = + dyn_cast<ShuffleVectorInst>(U)) { + // Detect the following pattern: A ShuffleVector instruction together + // with a reduction that do partial reduction on the first and second + // ElemNumToReduce / 2 elements, and store the result in + // ElemNumToReduce / 2 elements in another vector. + + unsigned ResultElements = ShufInst->getType()->getVectorNumElements(); + ElemNumToReduce = ResultElements <= ElemNumToReduce ? ResultElements + : ElemNumToReduce; + if (ElemNumToReduce == 1) + return false; + if (!isa<UndefValue>(U->getOperand(1))) + return false; + for (unsigned i = 0; i < ElemNumToReduce / 2; ++i) + if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2)) + return false; + for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i) + if (ShufInst->getMaskValue(i) != -1) + return false; + + // There is only one user of this ShuffleVector instruction, which + // must + // be a reduction operation. + if (!U->hasOneUse()) + return false; + + auto U2 = dyn_cast<Instruction>(*U->user_begin()); + if (!U2 || U2->getOpcode() != OpCode) + return false; + + // Check operands of the reduction operation. + if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) || + (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) { + UsersToVisit.push_back(U2); + ElemNumToReduce /= 2; + } else + return false; + } else if (isa<ExtractElementInst>(U)) { + // At this moment we should have reduced all elements in the vector. + if (ElemNumToReduce != 1) + return false; + + const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1)); + if (!Val || Val->getZExtValue() != 0) + return false; + + ReduxExtracted = true; + } else + return false; + } + } + return ReduxExtracted; +} + void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -2324,6 +2447,7 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { bool nuw = false; bool nsw = false; bool exact = false; + bool vec_redux = false; FastMathFlags FMF; if (const OverflowingBinaryOperator *OFBinOp = @@ -2337,10 +2461,16 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I)) FMF = FPOp->getFastMathFlags(); + if (isVectorReductionOp(&I)) { + vec_redux = true; + DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n"); + } + SDNodeFlags Flags; Flags.setExact(exact); Flags.setNoSignedWrap(nsw); Flags.setNoUnsignedWrap(nuw); + Flags.setVectorReduction(vec_redux); if (EnableFMFInDAG) { Flags.setAllowReciprocal(FMF.allowReciprocal()); Flags.setNoInfs(FMF.noInfs()); |