diff options
| author | Krzysztof Parzyszek <kparzysz@codeaurora.org> | 2016-07-29 15:15:35 +0000 |
|---|---|---|
| committer | Krzysztof Parzyszek <kparzysz@codeaurora.org> | 2016-07-29 15:15:35 +0000 |
| commit | 0006e1afddba3200687fa1f358436aff6dc25f76 (patch) | |
| tree | 7d7d0c44099f0f14bab2dc3b74c4f152ca31148b /llvm/lib | |
| parent | c3cb884c8e192eb81638425ff1e2c60388d70c0d (diff) | |
| download | bcm5719-llvm-0006e1afddba3200687fa1f358436aff6dc25f76.tar.gz bcm5719-llvm-0006e1afddba3200687fa1f358436aff6dc25f76.zip | |
[Hexagon] Improve balancing of address calculation
Rebalances address calculation trees and applies Hexagon-specific
optimizations to the trees to improve instruction selection.
Patch by Tobias Edler von Koch.
llvm-svn: 277151
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 741 |
1 files changed, 738 insertions, 3 deletions
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 1b680358ecc..6497a3e9367 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -32,6 +32,24 @@ MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders", cl::desc("Maximum number of uses of a global address such that we still us a" "constant extended instruction")); +static +cl::opt<bool> +EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true), + cl::desc("Rebalance address calculation trees to improve " + "instruction selection")); + +// Rebalance only if this allows e.g. combining a GA with an offset or +// factoring out a shift. +static +cl::opt<bool> +RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false), + cl::desc("Rebalance address tree only if this allows optimizations")); + +static +cl::opt<bool> +RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden, + cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced")); + //===----------------------------------------------------------------------===// // Instruction Selector Implementation //===----------------------------------------------------------------------===// @@ -42,14 +60,13 @@ MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders", /// namespace { class HexagonDAGToDAGISel : public SelectionDAGISel { - const HexagonTargetMachine &HTM; const HexagonSubtarget *HST; const HexagonInstrInfo *HII; const HexagonRegisterInfo *HRI; public: explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), HTM(tm), HST(nullptr), HII(nullptr), + : SelectionDAGISel(tm, OptLevel), HST(nullptr), HII(nullptr), HRI(nullptr) {} bool runOnMachineFunction(MachineFunction &MF) override { @@ -92,7 +109,6 @@ public: std::vector<SDValue> &OutOps) override; bool tryLoadOfLoadIntrinsic(LoadSDNode *N); void SelectLoad(SDNode *N); - void SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl); void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl); void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl); void SelectStore(SDNode *N); @@ -179,6 +195,17 @@ private: bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src); bool orIsAdd(const SDNode *N) const; bool isAlignedMemNode(const MemSDNode *N) const; + + SmallDenseMap<SDNode *,int> RootWeights; + SmallDenseMap<SDNode *,int> RootHeights; + SmallDenseMap<const Value *,int> GAUsesInFunction; + int getWeight(SDNode *N); + int getHeight(SDNode *N); + SDValue getMultiplierForSHL(SDNode *N); + SDValue factorOutPowerOf2(SDValue V, unsigned Power); + unsigned getUsesInFunction(const Value *V); + SDValue balanceSubTree(SDNode *N, bool Factorize = false); + void rebalanceAddressTrees(); }; // end HexagonDAGToDAGISel } // end anonymous namespace @@ -1373,6 +1400,16 @@ void HexagonDAGToDAGISel::PreprocessISelDAG() { SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C); ReplaceNode(T0.getNode(), NewShl.getNode()); } + + if (EnableAddressRebalancing) { + rebalanceAddressTrees(); + + DEBUG( + dbgs() << "************* SelectionDAG after preprocessing: ***********\n"; + CurDAG->dump(); + dbgs() << "************* End SelectionDAG after preprocessing ********\n"; + ); + } } void HexagonDAGToDAGISel::EmitFunctionEntryCode() { @@ -1540,3 +1577,701 @@ bool HexagonDAGToDAGISel::orIsAdd(const SDNode *N) const { bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const { return N->getAlignment() >= N->getMemoryVT().getStoreSize(); } + +//////////////////////////////////////////////////////////////////////////////// +// Rebalancing of address calculation trees + +static bool isOpcodeHandled(const SDNode *N) { + switch (N->getOpcode()) { + case ISD::ADD: + case ISD::MUL: + return true; + case ISD::SHL: + // We only handle constant shifts because these can be easily flattened + // into multiplications by 2^Op1. + return isa<ConstantSDNode>(N->getOperand(1).getNode()); + default: + return false; + } +} + +/// \brief Return the weight of an SDNode +int HexagonDAGToDAGISel::getWeight(SDNode *N) { + if (!isOpcodeHandled(N)) + return 1; + assert(RootWeights.count(N) && "Cannot get weight of unseen root!"); + assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!"); + assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!"); + return RootWeights[N]; +} + +int HexagonDAGToDAGISel::getHeight(SDNode *N) { + if (!isOpcodeHandled(N)) + return 0; + assert(RootWeights.count(N) && RootWeights[N] >= 0 && + "Cannot query height of unvisited/RAUW'd node!"); + return RootHeights[N]; +} + +struct WeightedLeaf { + SDValue Value; + int Weight; + int InsertionOrder; + + WeightedLeaf() : Value(SDValue()) { } + + WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) : + Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) { + assert(Weight >= 0 && "Weight must be >= 0"); + } + + static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) { + assert(A.Value.getNode() && B.Value.getNode()); + return A.Weight == B.Weight ? + (A.InsertionOrder > B.InsertionOrder) : + (A.Weight > B.Weight); + } +}; + +/// A specialized priority queue for WeigthedLeaves. It automatically folds +/// constants and allows removal of non-top elements while maintaining the +/// priority order. +class LeafPrioQueue { + SmallVector<WeightedLeaf, 8> Q; + bool HaveConst; + WeightedLeaf ConstElt; + unsigned Opcode; + +public: + bool empty() { + return (!HaveConst && Q.empty()); + } + + size_t size() { + return Q.size() + HaveConst; + } + + bool hasConst() { + return HaveConst; + } + + const WeightedLeaf &top() { + if (HaveConst) + return ConstElt; + return Q.front(); + } + + WeightedLeaf pop() { + if (HaveConst) { + HaveConst = false; + return ConstElt; + } + std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare); + return Q.pop_back_val(); + } + + void push(WeightedLeaf L, bool SeparateConst=true) { + if (!HaveConst && SeparateConst && isa<ConstantSDNode>(L.Value)) { + if (Opcode == ISD::MUL && + cast<ConstantSDNode>(L.Value)->getSExtValue() == 1) + return; + if (Opcode == ISD::ADD && + cast<ConstantSDNode>(L.Value)->getSExtValue() == 0) + return; + + HaveConst = true; + ConstElt = L; + } else { + Q.push_back(L); + std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare); + } + } + + /// Push L to the bottom of the queue regardless of its weight. If L is + /// constant, it will not be folded with other constants in the queue. + void pushToBottom(WeightedLeaf L) { + L.Weight = 1000; + push(L, false); + } + + /// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of + /// lowest weight and remove it from the queue. + WeightedLeaf findSHL(uint64_t MaxAmount); + + WeightedLeaf findMULbyConst(); + + LeafPrioQueue(unsigned Opcode) : + HaveConst(false), Opcode(Opcode) { } +}; + +WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) { + int ResultPos; + WeightedLeaf Result; + + for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) { + const WeightedLeaf &L = Q[Pos]; + const SDValue &Val = L.Value; + if (Val.getOpcode() != ISD::SHL || + !isa<ConstantSDNode>(Val.getOperand(1)) || + Val.getConstantOperandVal(1) > MaxAmount) + continue; + if (!Result.Value.getNode() || Result.Weight > L.Weight || + (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder)) + { + Result = L; + ResultPos = Pos; + } + } + + if (Result.Value.getNode()) { + Q.erase(&Q[ResultPos]); + std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare); + } + + return Result; +} + +WeightedLeaf LeafPrioQueue::findMULbyConst() { + int ResultPos; + WeightedLeaf Result; + + for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) { + const WeightedLeaf &L = Q[Pos]; + const SDValue &Val = L.Value; + if (Val.getOpcode() != ISD::MUL || + !isa<ConstantSDNode>(Val.getOperand(1)) || + Val.getConstantOperandVal(1) > 127) + continue; + if (!Result.Value.getNode() || Result.Weight > L.Weight || + (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder)) + { + Result = L; + ResultPos = Pos; + } + } + + if (Result.Value.getNode()) { + Q.erase(&Q[ResultPos]); + std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare); + } + + return Result; +} + +SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) { + uint64_t MulFactor = 1 << N->getConstantOperandVal(1); + return CurDAG->getConstant(MulFactor, SDLoc(N), + N->getOperand(1).getValueType()); +} + +/// @returns the value x for which 2^x is a factor of Val +static unsigned getPowerOf2Factor(SDValue Val) { + if (Val.getOpcode() == ISD::MUL) { + unsigned MaxFactor = 0; + for (int i=0; i < 2; ++i) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(i)); + if (!C) + continue; + const APInt &CInt = C->getAPIntValue(); + if (CInt.getBoolValue()) + MaxFactor = CInt.countTrailingZeros(); + } + return MaxFactor; + } + if (Val.getOpcode() == ISD::SHL) { + if (!isa<ConstantSDNode>(Val.getOperand(1).getNode())) + return 0; + return (unsigned) Val.getConstantOperandVal(1); + } + + return 0; +} + +/// @returns true if V>>Amount will eliminate V's operation on its child +static bool willShiftRightEliminate(SDValue V, unsigned Amount) { + if (V.getOpcode() == ISD::MUL) { + SDValue Ops[] = { V.getOperand(0), V.getOperand(1) }; + for (int i=0; i < 2; ++i) + if (isa<ConstantSDNode>(Ops[i].getNode()) && + V.getConstantOperandVal(i) % ((uint64_t)1 << Amount) == 0) { + uint64_t NewConst = V.getConstantOperandVal(i) >> Amount; + return (NewConst == 1); + } + } else if (V.getOpcode() == ISD::SHL) { + return (Amount == V.getConstantOperandVal(1)); + } + + return false; +} + +SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) { + SDValue Ops[] = { V.getOperand(0), V.getOperand(1) }; + if (V.getOpcode() == ISD::MUL) { + for (int i=0; i < 2; ++i) { + if (isa<ConstantSDNode>(Ops[i].getNode()) && + V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 0) { + uint64_t NewConst = V.getConstantOperandVal(i) >> Power; + if (NewConst == 1) + return Ops[!i]; + Ops[i] = CurDAG->getConstant(NewConst, + SDLoc(V), V.getValueType()); + break; + } + } + } else if (V.getOpcode() == ISD::SHL) { + uint64_t ShiftAmount = V.getConstantOperandVal(1); + if (ShiftAmount == Power) + return Ops[0]; + Ops[1] = CurDAG->getConstant(ShiftAmount - Power, + SDLoc(V), V.getValueType()); + } + + return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops); +} + +static bool isTargetConstant(const SDValue &V) { + return V.getOpcode() == HexagonISD::CONST32 || + V.getOpcode() == HexagonISD::CONST32_GP; +} + +unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) { + if (GAUsesInFunction.count(V)) + return GAUsesInFunction[V]; + + unsigned Result = 0; + const Function *CurF = CurDAG->getMachineFunction().getFunction(); + for (const User *U : V->users()) { + if (isa<Instruction>(U) && + cast<Instruction>(U)->getParent()->getParent() == CurF) + ++Result; + } + + GAUsesInFunction[V] = Result; + + return Result; +} + +/// Note - After calling this, N may be dead. It may have been replaced by a +/// new node, so always use the returned value in place of N. +/// +/// @returns The SDValue taking the place of N (which could be N if it is +/// unchanged) +SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) { + assert(RootWeights.count(N) && "Cannot balance non-root node."); + assert(RootWeights[N] != -2 && "This node was RAUW'd!"); + assert(!TopLevel || N->getOpcode() == ISD::ADD); + + // Return early if this node was already visited + if (RootWeights[N] != -1) + return SDValue(N, 0); + + assert(isOpcodeHandled(N)); + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + // Return early if the operands will remain unchanged or are all roots + if ((!isOpcodeHandled(Op0.getNode()) || RootWeights.count(Op0.getNode())) && + (!isOpcodeHandled(Op1.getNode()) || RootWeights.count(Op1.getNode()))) { + SDNode *Op0N = Op0.getNode(); + int Weight; + if (isOpcodeHandled(Op0N) && RootWeights[Op0N] == -1) { + Weight = getWeight(balanceSubTree(Op0N).getNode()); + // Weight = calculateWeight(Op0N); + } else + Weight = getWeight(Op0N); + + SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd + if (isOpcodeHandled(Op1N) && RootWeights[Op1N] == -1) { + Weight += getWeight(balanceSubTree(Op1N).getNode()); + // Weight += calculateWeight(Op1N); + } else + Weight += getWeight(Op1N); + + RootWeights[N] = Weight; + RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()), + getHeight(N->getOperand(1).getNode())) + 1; + + DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight + << " Height=" << RootHeights[N] << "): "); + DEBUG(N->dump()); + + return SDValue(N, 0); + } + + DEBUG(dbgs() << "** Balancing root node: "); + DEBUG(N->dump()); + + unsigned NOpcode = N->getOpcode(); + + LeafPrioQueue Leaves(NOpcode); + SmallVector<SDValue, 4> Worklist; + Worklist.push_back(SDValue(N, 0)); + + // SHL nodes will be converted to MUL nodes + if (NOpcode == ISD::SHL) + NOpcode = ISD::MUL; + + bool CanFactorize = false; + WeightedLeaf Mul1, Mul2; + unsigned MaxPowerOf2 = 0; + WeightedLeaf GA; + + // Do not try to factor out a shift if there is already a shift at the tip of + // the tree. + bool HaveTopLevelShift = false; + if (TopLevel && + ((isOpcodeHandled(Op0.getNode()) && Op0.getOpcode() == ISD::SHL && + Op0.getConstantOperandVal(1) < 4) || + (isOpcodeHandled(Op1.getNode()) && Op1.getOpcode() == ISD::SHL && + Op1.getConstantOperandVal(1) < 4))) + HaveTopLevelShift = true; + + // Flatten the subtree into an ordered list of leaves; at the same time + // determine whether the tree is already balanced. + int InsertionOrder = 0; + SmallDenseMap<SDValue, int> NodeHeights; + bool Imbalanced = false; + int CurrentWeight = 0; + while (!Worklist.empty()) { + SDValue Child = Worklist.pop_back_val(); + + if (Child.getNode() != N && RootWeights.count(Child.getNode())) { + // CASE 1: Child is a root note + + int Weight = RootWeights[Child.getNode()]; + if (Weight == -1) { + Child = balanceSubTree(Child.getNode()); + // calculateWeight(Child.getNode()); + Weight = getWeight(Child.getNode()); + } else if (Weight == -2) { + // Whoops, this node was RAUWd by one of the balanceSubTree calls we + // made. Our worklist isn't up to date anymore. + // Restart the whole process. + DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n"); + return balanceSubTree(N, TopLevel); + } + + NodeHeights[Child] = 1; + CurrentWeight += Weight; + + unsigned PowerOf2; + if (TopLevel && !CanFactorize && !HaveTopLevelShift && + (Child.getOpcode() == ISD::MUL || Child.getOpcode() == ISD::SHL) && + Child.hasOneUse() && (PowerOf2 = getPowerOf2Factor(Child))) { + // Try to identify two factorizable MUL/SHL children greedily. Leave + // them out of the priority queue for now so we can deal with them + // after. + if (!Mul1.Value.getNode()) { + Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++); + MaxPowerOf2 = PowerOf2; + } else { + Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++); + MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2); + + // Our addressing modes can only shift by a maximum of 3 + if (MaxPowerOf2 > 3) + MaxPowerOf2 = 3; + + CanFactorize = true; + } + } else + Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++)); + } else if (!isOpcodeHandled(Child.getNode())) { + // CASE 2: Child is an unhandled kind of node (e.g. constant) + int Weight = getWeight(Child.getNode()); + + NodeHeights[Child] = getHeight(Child.getNode()); + CurrentWeight += Weight; + + if (isTargetConstant(Child) && !GA.Value.getNode()) + GA = WeightedLeaf(Child, Weight, InsertionOrder++); + else + Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++)); + } else { + // CASE 3: Child is a subtree of same opcode + // Visit children first, then flatten. + unsigned ChildOpcode = Child.getOpcode(); + assert(ChildOpcode == NOpcode || + (NOpcode == ISD::MUL && ChildOpcode == ISD::SHL)); + + // Convert SHL to MUL + SDValue Op1; + if (ChildOpcode == ISD::SHL) + Op1 = getMultiplierForSHL(Child.getNode()); + else + Op1 = Child->getOperand(1); + + if (!NodeHeights.count(Op1) || !NodeHeights.count(Child->getOperand(0))) { + assert(!NodeHeights.count(Child) && "Parent visited before children?"); + // Visit children first, then re-visit this node + Worklist.push_back(Child); + Worklist.push_back(Op1); + Worklist.push_back(Child->getOperand(0)); + } else { + // Back at this node after visiting the children + if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1) + Imbalanced = true; + + NodeHeights[Child] = std::max(NodeHeights[Op1], + NodeHeights[Child->getOperand(0)]) + 1; + } + } + } + + DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)] + << " weight=" << CurrentWeight << " imbalanced=" + << Imbalanced << "\n"); + + // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y) + // This factors out a shift in order to match memw(a<<Y+b). + if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) || + willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) { + DEBUG(dbgs() << "--> Found common factor for two MUL children!\n"); + int Weight = Mul1.Weight + Mul2.Weight; + int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1; + SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2); + SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2); + SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(), + Mul1Factored, Mul2Factored); + SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N), + Mul1.Value.getValueType()); + SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(), + Sum, Const); + NodeHeights[New] = Height; + Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder)); + } else if (Mul1.Value.getNode()) { + // We failed to factorize two MULs, so now the Muls are left outside the + // queue... add them back. + Leaves.push(Mul1); + if (Mul2.Value.getNode()) + Leaves.push(Mul2); + CanFactorize = false; + } + + // Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere + // and the root node itself is not used more than twice. This reduces the + // amount of additional constant extenders introduced by this optimization. + bool CombinedGA = false; + if (NOpcode == ISD::ADD && GA.Value.getNode() && Leaves.hasConst() && + GA.Value.hasOneUse() && N->use_size() < 3) { + GlobalAddressSDNode *GANode = + cast<GlobalAddressSDNode>(GA.Value.getOperand(0)); + ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value); + + if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() && + getTargetLowering()->isOffsetFoldingLegal(GANode)) { + DEBUG(dbgs() << "--> Combining GA and offset (" << Offset->getSExtValue() + << "): "); + DEBUG(GANode->dump()); + + SDValue NewTGA = + CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value), + GANode->getValueType(0), + GANode->getOffset() + (uint64_t)Offset->getSExtValue()); + GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value), + GA.Value.getValueType(), NewTGA); + GA.Weight += Leaves.top().Weight; + + NodeHeights[GA.Value] = getHeight(GA.Value.getNode()); + CombinedGA = true; + + Leaves.pop(); // Remove the offset constant from the queue + } + } + + if ((RebalanceOnlyForOptimizations && !CanFactorize && !CombinedGA) || + (RebalanceOnlyImbalancedTrees && !Imbalanced)) { + RootWeights[N] = CurrentWeight; + RootHeights[N] = NodeHeights[SDValue(N, 0)]; + + return SDValue(N, 0); + } + + // Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5)) + if (NOpcode == ISD::ADD && GA.Value.getNode()) { + WeightedLeaf SHL = Leaves.findSHL(31); + if (SHL.Value.getNode()) { + int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1; + GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value), + GA.Value.getValueType(), + GA.Value, SHL.Value); + GA.Weight = SHL.Weight; // Specifically ignore the GA weight here + NodeHeights[GA.Value] = Height; + } + } + + if (GA.Value.getNode()) + Leaves.push(GA); + + // If this is the top level and we haven't factored out a shift, we should try + // to move a constant to the bottom to match addressing modes like memw(rX+C) + if (TopLevel && !CanFactorize && Leaves.hasConst()) { + DEBUG(dbgs() << "--> Pushing constant to tip of tree."); + Leaves.pushToBottom(Leaves.pop()); + } + + // Rebuild the tree using Huffman's algorithm + while (Leaves.size() > 1) { + WeightedLeaf L0 = Leaves.pop(); + + // See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)), + // otherwise just get the next leaf + WeightedLeaf L1 = Leaves.findMULbyConst(); + if (!L1.Value.getNode()) + L1 = Leaves.pop(); + + assert(L0.Weight <= L1.Weight && "Priority queue is broken!"); + + SDValue V0 = L0.Value; + int V0Weight = L0.Weight; + SDValue V1 = L1.Value; + int V1Weight = L1.Weight; + + // Make sure that none of these nodes have been RAUW'd + if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) || + (RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) { + DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n"); + return balanceSubTree(N, TopLevel); + } + + ConstantSDNode *V0C = dyn_cast<ConstantSDNode>(V0); + ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(V1); + EVT VT = N->getValueType(0); + SDValue NewNode; + + if (V0C && !V1C) { + std::swap(V0, V1); + std::swap(V0C, V1C); + } + + // Calculate height of this node + assert(NodeHeights.count(V0) && NodeHeights.count(V1) && + "Children must have been visited before re-combining them!"); + int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1; + + // Rebuild this node (and restore SHL from MUL if needed) + if (V1C && NOpcode == ISD::MUL && V1C->getAPIntValue().isPowerOf2()) + NewNode = CurDAG->getNode( + ISD::SHL, SDLoc(V0), VT, V0, + CurDAG->getConstant( + V1C->getAPIntValue().logBase2(), SDLoc(N), + getTargetLowering()->getScalarShiftAmountTy(CurDAG->getDataLayout(), V0.getValueType()))); + else + NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1); + + NodeHeights[NewNode] = Height; + + int Weight = V0Weight + V1Weight; + Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder)); + + DEBUG(dbgs() << "--> Built new node (Weight=" << Weight << ",Height=" + << Height << "):\n"); + DEBUG(NewNode.dump()); + } + + assert(Leaves.size() == 1); + SDValue NewRoot = Leaves.top().Value; + + assert(NodeHeights.count(NewRoot)); + int Height = NodeHeights[NewRoot]; + + // Restore SHL if we earlier converted it to a MUL + if (NewRoot.getOpcode() == ISD::MUL) { + ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1)); + if (V1C && V1C->getAPIntValue().isPowerOf2()) { + EVT VT = NewRoot.getValueType(); + SDValue V0 = NewRoot.getOperand(0); + NewRoot = CurDAG->getNode( + ISD::SHL, SDLoc(NewRoot), VT, V0, + CurDAG->getConstant(V1C->getAPIntValue().logBase2(), SDLoc(NewRoot), + getTargetLowering()->getScalarShiftAmountTy( + CurDAG->getDataLayout(), V0.getValueType()))); + } + } + + if (N != NewRoot.getNode()) { + DEBUG(dbgs() << "--> Root is now: "); + DEBUG(NewRoot.dump()); + + // Replace all uses of old root by new root + CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode()); + // Mark that we have RAUW'd N + RootWeights[N] = -2; + } else { + DEBUG(dbgs() << "--> Root unchanged.\n"); + } + + RootWeights[NewRoot.getNode()] = Leaves.top().Weight; + RootHeights[NewRoot.getNode()] = Height; + + return NewRoot; +} + +void HexagonDAGToDAGISel::rebalanceAddressTrees() { + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E;) { + SDNode *N = &*I++; + if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE) + continue; + + SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr(); + if (BasePtr.getOpcode() != ISD::ADD) + continue; + + // We've already processed this node + if (RootWeights.count(BasePtr.getNode())) + continue; + + DEBUG(dbgs() << "** Rebalancing address calculation in node: "); + DEBUG(N->dump()); + + // FindRoots + SmallVector<SDNode *, 4> Worklist; + + Worklist.push_back(BasePtr.getOperand(0).getNode()); + Worklist.push_back(BasePtr.getOperand(1).getNode()); + + while (!Worklist.empty()) { + SDNode *N = Worklist.pop_back_val(); + unsigned Opcode = N->getOpcode(); + + if (!isOpcodeHandled(N)) + continue; + + Worklist.push_back(N->getOperand(0).getNode()); + Worklist.push_back(N->getOperand(1).getNode()); + + // Not a root if it has only one use and same opcode as its parent + if (N->hasOneUse() && Opcode == N->use_begin()->getOpcode()) + continue; + + // This root node has already been processed + if (RootWeights.count(N)) + continue; + + RootWeights[N] = -1; + } + + // Balance node itself + RootWeights[BasePtr.getNode()] = -1; + SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true); + + if (N->getOpcode() == ISD::LOAD) + N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), + NewBasePtr, N->getOperand(2)); + else + N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), + NewBasePtr, N->getOperand(3)); + + DEBUG(dbgs() << "--> Final node: "); + DEBUG(N->dump()); + } + + CurDAG->RemoveDeadNodes(); + GAUsesInFunction.clear(); + RootHeights.clear(); + RootWeights.clear(); +} + + |

