summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-04-16 19:27:00 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-04-16 19:27:00 +0000
commit82ffa88a04a7cb961e482387ec25d6fd0c51ce20 (patch)
tree6abc1b4d1624b676804b567a6d56de40863eb846 /llvm/lib
parent3084db3bb1a0a7c0fe6dc400f8be9c2633be9063 (diff)
downloadbcm5719-llvm-82ffa88a04a7cb961e482387ec25d6fd0c51ce20.tar.gz
bcm5719-llvm-82ffa88a04a7cb961e482387ec25d6fd0c51ce20.zip
[SLP] Refactoring of the operand reordering code.
This is a refactoring patch which should have all the functionality of the current code. Its goal is twofold: i. Cleanup and simplify the reordering code, and ii. Generalize reordering so that it will work for an arbitrary number of operands, not just 2. This is the second patch in a series of patches that will enable operand reordering across chains of operations. An example of this was presented in EuroLLVM'18 https://www.youtube.com/watch?v=gIEn34LvyNo . Committed on behalf of @vporpo (Vasileios Porpodas) Differential Revision: https://reviews.llvm.org/D59973 llvm-svn: 358519
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp634
1 files changed, 463 insertions, 171 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2b5d4d81dd1..22c4dfdd751 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -634,6 +634,449 @@ public:
#endif
};
+ /// A helper data structure to hold the operands of a vector of instructions.
+ /// This supports a fixed vector length for all operand vectors.
+ class VLOperands {
+ /// For each operand we need (i) the value, and (ii) the opcode that it
+ /// would be attached to if the expression was in a left-linearized form.
+ /// This is required to avoid illegal operand reordering.
+ /// For example:
+ /// \verbatim
+ /// 0 Op1
+ /// |/
+ /// Op1 Op2 Linearized + Op2
+ /// \ / ----------> |/
+ /// - -
+ ///
+ /// Op1 - Op2 (0 + Op1) - Op2
+ /// \endverbatim
+ ///
+ /// Value Op1 is attached to a '+' operation, and Op2 to a '-'.
+ ///
+ /// Another way to think of this is to track all the operations across the
+ /// path from the operand all the way to the root of the tree and to
+ /// calculate the operation that corresponds to this path. For example, the
+ /// path from Op2 to the root crosses the RHS of the '-', therefore the
+ /// corresponding operation is a '-' (which matches the one in the
+ /// linearized tree, as shown above).
+ ///
+ /// For lack of a better term, we refer to this operation as Accumulated
+ /// Path Operation (APO).
+ struct OperandData {
+ OperandData() = default;
+ OperandData(Value *V, bool APO, bool IsUsed)
+ : V(V), APO(APO), IsUsed(IsUsed) {}
+ /// The operand value.
+ Value *V = nullptr;
+ /// TreeEntries only allow a single opcode, or an alternate sequence of
+ /// them (e.g, +, -). Therefore, we can safely use a boolean value for the
+ /// APO. It is set to 'true' if 'V' is attached to an inverse operation
+ /// in the left-linearized form (e.g., Sub/Div), and 'false' otherwise
+ /// (e.g., Add/Mul)
+ bool APO = false;
+ /// Helper data for the reordering function.
+ bool IsUsed = false;
+ };
+
+ /// During operand reordering, we are trying to select the operand at lane
+ /// that matches best with the operand at the neighboring lane. Our
+ /// selection is based on the type of value we are looking for. For example,
+ /// if the neighboring lane has a load, we need to look for a load that is
+ /// accessing a consecutive address. These strategies are summarized in the
+ /// 'ReorderingMode' enumerator.
+ enum class ReorderingMode {
+ Load, ///< Matching loads to consecutive memory addresses
+ Opcode, ///< Matching instructions based on opcode (same or alternate)
+ Constant, ///< Matching constants
+ Splat, ///< Matching the same instruction multiple times (broadcast)
+ Failed, ///< We failed to create a vectorizable group
+ };
+
+ using OperandDataVec = SmallVector<OperandData, 2>;
+
+ /// A vector of operand vectors.
+ SmallVector<OperandDataVec, 4> OpsVec;
+
+ const DataLayout &DL;
+ ScalarEvolution &SE;
+
+ /// \returns the operand data at \p OpIdx and \p Lane.
+ OperandData &getData(unsigned OpIdx, unsigned Lane) {
+ return OpsVec[OpIdx][Lane];
+ }
+
+ /// \returns the operand data at \p OpIdx and \p Lane. Const version.
+ const OperandData &getData(unsigned OpIdx, unsigned Lane) const {
+ return OpsVec[OpIdx][Lane];
+ }
+
+ /// Clears the used flag for all entries.
+ void clearUsed() {
+ for (unsigned OpIdx = 0, NumOperands = getNumOperands();
+ OpIdx != NumOperands; ++OpIdx)
+ for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
+ ++Lane)
+ OpsVec[OpIdx][Lane].IsUsed = false;
+ }
+
+ /// Swap the operand at \p OpIdx1 with that one at \p OpIdx2.
+ void swap(unsigned OpIdx1, unsigned OpIdx2, unsigned Lane) {
+ std::swap(OpsVec[OpIdx1][Lane], OpsVec[OpIdx2][Lane]);
+ }
+
+ // Search all operands in Ops[*][Lane] for the one that matches best
+ // Ops[OpIdx][LastLane] and return its opreand index.
+ // If no good match can be found, return None.
+ Optional<unsigned>
+ getBestOperand(unsigned OpIdx, int Lane, int LastLane,
+ ArrayRef<ReorderingMode> ReorderingModes) {
+ unsigned NumOperands = getNumOperands();
+
+ // The operand of the previous lane at OpIdx.
+ Value *OpLastLane = getData(OpIdx, LastLane).V;
+
+ // Our strategy mode for OpIdx.
+ ReorderingMode RMode = ReorderingModes[OpIdx];
+
+ // The linearized opcode of the operand at OpIdx, Lane.
+ bool OpIdxAPO = getData(OpIdx, Lane).APO;
+
+ const unsigned BestScore = 2;
+ const unsigned GoodScore = 1;
+
+ // The best operand index and its score.
+ // Sometimes we have more than one option (e.g., Opcode and Undefs), so we
+ // are using the score to differentiate between the two.
+ struct BestOpData {
+ Optional<unsigned> Idx = None;
+ unsigned Score = 0;
+ } BestOp;
+
+ // Iterate through all unused operands and look for the best.
+ for (unsigned Idx = 0; Idx != NumOperands; ++Idx) {
+ // Get the operand at Idx and Lane.
+ OperandData &OpData = getData(Idx, Lane);
+ Value *Op = OpData.V;
+ bool OpAPO = OpData.APO;
+
+ // Skip already selected operands.
+ if (OpData.IsUsed)
+ continue;
+
+ // Skip if we are trying to move the operand to a position with a
+ // different opcode in the linearized tree form. This would break the
+ // semantics.
+ if (OpAPO != OpIdxAPO)
+ continue;
+
+ // Look for an operand that matches the current mode.
+ switch (RMode) {
+ case ReorderingMode::Load:
+ if (isa<LoadInst>(Op)) {
+ // Figure out which is left and right, so that we can check for
+ // consecutive loads
+ bool LeftToRight = Lane > LastLane;
+ Value *OpLeft = (LeftToRight) ? OpLastLane : Op;
+ Value *OpRight = (LeftToRight) ? Op : OpLastLane;
+ if (isConsecutiveAccess(cast<LoadInst>(OpLeft),
+ cast<LoadInst>(OpRight), DL, SE))
+ BestOp.Idx = Idx;
+ }
+ break;
+ case ReorderingMode::Opcode:
+ // We accept both Instructions and Undefs, but with different scores.
+ if ((isa<Instruction>(Op) &&
+ cast<Instruction>(Op)->getOpcode() ==
+ cast<Instruction>(OpLastLane)->getOpcode()) ||
+ isa<UndefValue>(Op)) {
+ // An instruction has a higher score than an undef.
+ unsigned Score = (isa<UndefValue>(Op)) ? GoodScore : BestScore;
+ if (Score > BestOp.Score) {
+ BestOp.Idx = Idx;
+ BestOp.Score = Score;
+ }
+ }
+ break;
+ case ReorderingMode::Constant:
+ if (isa<Constant>(Op)) {
+ unsigned Score = (isa<UndefValue>(Op)) ? GoodScore : BestScore;
+ if (Score > BestOp.Score) {
+ BestOp.Idx = Idx;
+ BestOp.Score = Score;
+ }
+ }
+ break;
+ case ReorderingMode::Splat:
+ if (Op == OpLastLane)
+ BestOp.Idx = Idx;
+ break;
+ case ReorderingMode::Failed:
+ return None;
+ }
+ }
+
+ if (BestOp.Idx) {
+ getData(BestOp.Idx.getValue(), Lane).IsUsed = true;
+ return BestOp.Idx;
+ }
+ // If we could not find a good match return None.
+ return None;
+ }
+
+ /// Helper for reorderOperandVecs. \Returns the lane that we should start
+ /// reordering from. This is the one which has the least number of operands
+ /// that can freely move about.
+ unsigned getBestLaneToStartReordering() const {
+ unsigned BestLane = 0;
+ unsigned Min = UINT_MAX;
+ for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
+ ++Lane) {
+ unsigned NumFreeOps = getMaxNumOperandsThatCanBeReordered(Lane);
+ if (NumFreeOps < Min) {
+ Min = NumFreeOps;
+ BestLane = Lane;
+ }
+ }
+ return BestLane;
+ }
+
+ /// \Returns the maximum number of operands that are allowed to be reordered
+ /// for \p Lane. This is used as a heuristic for selecting the first lane to
+ /// start operand reordering.
+ unsigned getMaxNumOperandsThatCanBeReordered(unsigned Lane) const {
+ unsigned CntTrue = 0;
+ unsigned NumOperands = getNumOperands();
+ // Operands with the same APO can be reordered. We therefore need to count
+ // how many of them we have for each APO, like this: Cnt[APO] = x.
+ // Since we only have two APOs, namely true and false, we can avoid using
+ // a map. Instead we can simply count the number of operands that
+ // correspond to one of them (in this case the 'true' APO), and calculate
+ // the other by subtracting it from the total number of operands.
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
+ if (getData(OpIdx, Lane).APO)
+ ++CntTrue;
+ unsigned CntFalse = NumOperands - CntTrue;
+ return std::max(CntTrue, CntFalse);
+ }
+
+ /// Go through the instructions in VL and append their operands.
+ void appendOperandsOfVL(ArrayRef<Value *> VL) {
+ assert(!VL.empty() && "Bad VL");
+ assert((empty() || VL.size() == getNumLanes()) &&
+ "Expected same number of lanes");
+ assert(isa<Instruction>(VL[0]) && "Expected instruction");
+ unsigned NumOperands = cast<Instruction>(VL[0])->getNumOperands();
+ OpsVec.resize(NumOperands);
+ unsigned NumLanes = VL.size();
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+ OpsVec[OpIdx].resize(NumLanes);
+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+ assert(isa<Instruction>(VL[Lane]) && "Expected instruction");
+ // Our tree has just 3 nodes: the root and two operands.
+ // It is therefore trivial to get the APO. We only need to check the
+ // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
+ // RHS operand. The LHS operand of both add and sub is never attached
+ // to an inversese operation in the linearized form, therefore its APO
+ // is false. The RHS is ture only if VL[Lane] is an inverse operation.
+
+ // Since operand reordering is performed on groups of commutative
+ // operations or alternating sequences (e.g., +, -), we can safely
+ // tell the inverse operations by checking commutativity.
+ bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
+ bool APO = (OpIdx == 0) ? false : IsInverseOperation;
+ OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
+ APO, false};
+ }
+ }
+ }
+
+ /// \returns the number of operands.
+ unsigned getNumOperands() const { return OpsVec.size(); }
+
+ /// \returns the number of lanes.
+ unsigned getNumLanes() const { return OpsVec[0].size(); }
+
+ /// \returns the operand value at \p OpIdx and \p Lane.
+ Value *getValue(unsigned OpIdx, unsigned Lane) const {
+ return getData(OpIdx, Lane).V;
+ }
+
+ /// \returns true if the data structure is empty.
+ bool empty() const { return OpsVec.empty(); }
+
+ /// Clears the data.
+ void clear() { OpsVec.clear(); }
+
+ public:
+ /// Initialize with all the operands of the instruction vector \p RootVL.
+ VLOperands(ArrayRef<Value *> RootVL, const DataLayout &DL,
+ ScalarEvolution &SE)
+ : DL(DL), SE(SE) {
+ // Append all the operands of RootVL.
+ appendOperandsOfVL(RootVL);
+ }
+
+ /// \Returns a value vector with the operands across all lanes for the
+ /// opearnd at \p OpIdx.
+ ValueList getVL(unsigned OpIdx) const {
+ ValueList OpVL(OpsVec[OpIdx].size());
+ assert(OpsVec[OpIdx].size() == getNumLanes() &&
+ "Expected same num of lanes across all operands");
+ for (unsigned Lane = 0, Lanes = getNumLanes(); Lane != Lanes; ++Lane)
+ OpVL[Lane] = OpsVec[OpIdx][Lane].V;
+ return OpVL;
+ }
+
+ // Performs operand reordering for 2 or more operands.
+ // The original operands are in OrigOps[OpIdx][Lane].
+ // The reordered operands are returned in 'SortedOps[OpIdx][Lane]'.
+ void reorder() {
+ unsigned NumOperands = getNumOperands();
+ unsigned NumLanes = getNumLanes();
+ // Each operand has its own mode. We are using this mode to help us select
+ // the instructions for each lane, so that they match best with the ones
+ // we have selected so far.
+ SmallVector<ReorderingMode, 2> ReorderingModes(NumOperands);
+
+ // This is a greedy single-pass algorithm. We are going over each lane
+ // once and deciding on the best order right away with no back-tracking.
+ // However, in order to increase its effectiveness, we start with the lane
+ // that has operands that can move the least. For example, given the
+ // following lanes:
+ // Lane 0 : A[0] = B[0] + C[0] // Visited 3rd
+ // Lane 1 : A[1] = C[1] - B[1] // Visited 1st
+ // Lane 2 : A[2] = B[2] + C[2] // Visited 2nd
+ // Lane 3 : A[3] = C[3] - B[3] // Visited 4th
+ // we will start at Lane 1, since the operands of the subtraction cannot
+ // be reordered. Then we will visit the rest of the lanes in a circular
+ // fashion. That is, Lanes 2, then Lane 0, and finally Lane 3.
+
+ // Find the first lane that we will start our search from.
+ unsigned FirstLane = getBestLaneToStartReordering();
+
+ // Initialize the modes.
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+ Value *OpLane0 = getValue(OpIdx, FirstLane);
+ // Keep track if we have instructions with all the same opcode on one
+ // side.
+ if (isa<LoadInst>(OpLane0))
+ ReorderingModes[OpIdx] = ReorderingMode::Load;
+ else if (isa<Instruction>(OpLane0))
+ ReorderingModes[OpIdx] = ReorderingMode::Opcode;
+ else if (isa<Constant>(OpLane0))
+ ReorderingModes[OpIdx] = ReorderingMode::Constant;
+ else if (isa<Argument>(OpLane0))
+ // Our best hope is a Splat. It may save some cost in some cases.
+ ReorderingModes[OpIdx] = ReorderingMode::Splat;
+ else
+ // NOTE: This should be unreachable.
+ ReorderingModes[OpIdx] = ReorderingMode::Failed;
+ }
+
+ // If the initial strategy fails for any of the operand indexes, then we
+ // perform reordering again in a second pass. This helps avoid assigning
+ // high priority to the failed strategy, and should improve reordering for
+ // the non-failed operand indexes.
+ for (int Pass = 0; Pass != 2; ++Pass) {
+ // Skip the second pass if the first pass did not fail.
+ bool StrategyFailed = false;
+ // Mark the operand data as free to use for all but the first pass.
+ if (Pass > 0)
+ clearUsed();
+ // We keep the original operand order for the FirstLane, so reorder the
+ // rest of the lanes. We are visiting the nodes in a circular fashion,
+ // using FirstLane as the center point and increasing the radius
+ // distance.
+ for (unsigned Distance = 1; Distance != NumLanes; ++Distance) {
+ // Visit the lane on the right and then the lane on the left.
+ for (int Direction : {+1, -1}) {
+ int Lane = FirstLane + Direction * Distance;
+ if (Lane < 0 || Lane >= (int)NumLanes)
+ continue;
+ int LastLane = Lane - Direction;
+ assert(LastLane >= 0 && LastLane < (int)NumLanes &&
+ "Out of bounds");
+ // Look for a good match for each operand.
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+ // Search for the operand that matches SortedOps[OpIdx][Lane-1].
+ Optional<unsigned> BestIdx =
+ getBestOperand(OpIdx, Lane, LastLane, ReorderingModes);
+ // By not selecting a value, we allow the operands that follow to
+ // select a better matching value. We will get a non-null value in
+ // the next run of getBestOperand().
+ if (BestIdx) {
+ // Swap the current operand with the one returned by
+ // getBestOperand().
+ swap(OpIdx, BestIdx.getValue(), Lane);
+ } else {
+ // We failed to find a best operand, set mode to 'Failed'.
+ ReorderingModes[OpIdx] = ReorderingMode::Failed;
+ // Enable the second pass.
+ StrategyFailed = true;
+ }
+ }
+ }
+ }
+ // Skip second pass if the strategy did not fail.
+ if (!StrategyFailed)
+ break;
+ }
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD static StringRef getModeStr(ReorderingMode RMode) {
+ switch (RMode) {
+ case ReorderingMode::Load:
+ return "Load";
+ case ReorderingMode::Opcode:
+ return "Opcode";
+ case ReorderingMode::Constant:
+ return "Constant";
+ case ReorderingMode::Splat:
+ return "Splat";
+ case ReorderingMode::Failed:
+ return "Failed";
+ }
+ llvm_unreachable("Unimplemented Reordering Type");
+ }
+
+ LLVM_DUMP_METHOD static raw_ostream &printMode(ReorderingMode RMode,
+ raw_ostream &OS) {
+ return OS << getModeStr(RMode);
+ }
+
+ /// Debug print.
+ LLVM_DUMP_METHOD static void dumpMode(ReorderingMode RMode) {
+ printMode(RMode, dbgs());
+ }
+
+ friend raw_ostream &operator<<(raw_ostream &OS, ReorderingMode RMode) {
+ return printMode(RMode, OS);
+ }
+
+ LLVM_DUMP_METHOD raw_ostream &print(raw_ostream &OS) const {
+ const unsigned Indent = 2;
+ unsigned Cnt = 0;
+ for (const OperandDataVec &OpDataVec : OpsVec) {
+ OS << "Operand " << Cnt++ << "\n";
+ for (const OperandData &OpData : OpDataVec) {
+ OS.indent(Indent) << "{";
+ if (Value *V = OpData.V)
+ OS << *V;
+ else
+ OS << "null";
+ OS << ", APO:" << OpData.APO << "}\n";
+ }
+ OS << "\n";
+ }
+ return OS;
+ }
+
+ /// Debug print.
+ LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
+#endif
+ };
+
private:
struct TreeEntry;
@@ -681,12 +1124,13 @@ private:
/// be beneficial even the tree height is tiny.
bool isFullyVectorizableTinyTree() const;
- /// \reorder commutative operands to get better probability of
+ /// Reorder commutative or alt operands to get better probability of
/// generating vectorized code.
- void reorderInputsAccordingToOpcode(const InstructionsState &S,
- ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right) const;
+ static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right,
+ const DataLayout &DL,
+ ScalarEvolution &SE);
struct TreeEntry {
TreeEntry(std::vector<TreeEntry> &Container) : Container(Container) {}
@@ -1866,7 +2310,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Commutative predicate - collect + sort operands of the instructions
// so that each side is more likely to have the same opcode.
assert(P0 == SwapP0 && "Commutative Predicate mismatch");
- reorderInputsAccordingToOpcode(S, VL, Left, Right);
+ reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
} else {
// Collect operands - commute if it uses the swapped predicate.
for (Value *V : VL) {
@@ -1912,7 +2356,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// have the same opcode.
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
- reorderInputsAccordingToOpcode(S, VL, Left, Right);
+ reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
UserTreeIdx.EdgeIdx = 0;
buildTree_rec(Left, Depth + 1, UserTreeIdx);
UserTreeIdx.EdgeIdx = 1;
@@ -2087,7 +2531,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Reorder operands if reordering would enable vectorization.
if (isa<BinaryOperator>(VL0)) {
ValueList Left, Right;
- reorderInputsAccordingToOpcode(S, VL, Left, Right);
+ reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
UserTreeIdx.EdgeIdx = 0;
buildTree_rec(Left, Depth + 1, UserTreeIdx);
UserTreeIdx.EdgeIdx = 1;
@@ -2802,171 +3246,19 @@ int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
return getGatherCost(VecTy, ShuffledElements);
}
-// Return true if the i'th left and right operands can be commuted.
-//
-// The vectorizer is trying to either have all elements one side being
-// instruction with the same opcode to enable further vectorization, or having
-// a splat to lower the vectorizing cost.
-static bool shouldReorderOperands(int i, ArrayRef<Value *> Left,
- ArrayRef<Value *> Right,
- bool AllSameOpcodeLeft,
- bool AllSameOpcodeRight, bool SplatLeft,
- bool SplatRight) {
- Value *PrevLeft = Left[i - 1];
- Value *PrevRight = Right[i - 1];
- Value *CurrLeft = Left[i];
- Value *CurrRight = Right[i];
-
- // If we have "SplatRight", try to see if commuting is needed to preserve it.
- if (SplatRight) {
- if (CurrRight == PrevRight)
- // Preserve SplatRight
- return false;
- if (CurrLeft == PrevRight) {
- // Commuting would preserve SplatRight, but we don't want to break
- // SplatLeft either, i.e. preserve the original order if possible.
- // (FIXME: why do we care?)
- if (SplatLeft && CurrLeft == PrevLeft)
- return false;
- return true;
- }
- }
- // Symmetrically handle Right side.
- if (SplatLeft) {
- if (CurrLeft == PrevLeft)
- // Preserve SplatLeft
- return false;
- if (CurrRight == PrevLeft)
- return true;
- }
-
- Instruction *ILeft = dyn_cast<Instruction>(CurrLeft);
- Instruction *IRight = dyn_cast<Instruction>(CurrRight);
-
- // If we have "AllSameOpcodeRight", try to see if the left operands preserves
- // it and not the right, in this case we want to commute.
- if (AllSameOpcodeRight) {
- unsigned RightPrevOpcode = cast<Instruction>(PrevRight)->getOpcode();
- if (IRight && RightPrevOpcode == IRight->getOpcode())
- // Do not commute, a match on the right preserves AllSameOpcodeRight
- return false;
- if (ILeft && RightPrevOpcode == ILeft->getOpcode()) {
- // We have a match and may want to commute, but first check if there is
- // not also a match on the existing operands on the Left to preserve
- // AllSameOpcodeLeft, i.e. preserve the original order if possible.
- // (FIXME: why do we care?)
- if (AllSameOpcodeLeft && ILeft &&
- cast<Instruction>(PrevLeft)->getOpcode() == ILeft->getOpcode())
- return false;
- return true;
- }
- }
- // Symmetrically handle Left side.
- if (AllSameOpcodeLeft) {
- unsigned LeftPrevOpcode = cast<Instruction>(PrevLeft)->getOpcode();
- if (ILeft && LeftPrevOpcode == ILeft->getOpcode())
- return false;
- if (IRight && LeftPrevOpcode == IRight->getOpcode())
- return true;
- }
- return false;
-}
-
+// Perform operand reordering on the instructions in VL and return the reordered
+// operands in Left and Right.
void BoUpSLP::reorderInputsAccordingToOpcode(
- const InstructionsState &S, ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left, SmallVectorImpl<Value *> &Right) const {
- assert(!VL.empty() && Left.empty() && Right.empty() &&
- "Unexpected instruction/operand lists");
-
- // Push left and right operands of binary operation into Left and Right
- for (Value *V : VL) {
- auto *I = cast<Instruction>(V);
- assert(S.isOpcodeOrAlt(I) && "Incorrect instruction in vector");
- Left.push_back(I->getOperand(0));
- Right.push_back(I->getOperand(1));
- }
-
- // Keep track if we have instructions with all the same opcode on one side.
- bool AllSameOpcodeLeft = isa<Instruction>(Left[0]);
- bool AllSameOpcodeRight = isa<Instruction>(Right[0]);
- // Keep track if we have one side with all the same value (broadcast).
- bool SplatLeft = true;
- bool SplatRight = true;
-
- for (unsigned i = 1, e = VL.size(); i != e; ++i) {
- Instruction *I = cast<Instruction>(VL[i]);
- // Commute to favor either a splat or maximizing having the same opcodes on
- // one side.
- if (isCommutative(I) &&
- shouldReorderOperands(i, Left, Right, AllSameOpcodeLeft,
- AllSameOpcodeRight, SplatLeft, SplatRight))
- std::swap(Left[i], Right[i]);
-
- // Update Splat* and AllSameOpcode* after the insertion.
- SplatRight = SplatRight && (Right[i - 1] == Right[i]);
- SplatLeft = SplatLeft && (Left[i - 1] == Left[i]);
- AllSameOpcodeLeft = AllSameOpcodeLeft && isa<Instruction>(Left[i]) &&
- (cast<Instruction>(Left[i - 1])->getOpcode() ==
- cast<Instruction>(Left[i])->getOpcode());
- AllSameOpcodeRight = AllSameOpcodeRight && isa<Instruction>(Right[i]) &&
- (cast<Instruction>(Right[i - 1])->getOpcode() ==
- cast<Instruction>(Right[i])->getOpcode());
- }
-
- // If one operand end up being broadcast, return this operand order.
- if (SplatRight || SplatLeft)
+ ArrayRef<Value *> VL, SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right, const DataLayout &DL,
+ ScalarEvolution &SE) {
+ if (VL.empty())
return;
-
- // Finally check if we can get longer vectorizable chain by reordering
- // without breaking the good operand order detected above.
- // E.g. If we have something like-
- // load a[0] - load b[0]
- // load b[1] + load a[1]
- // load a[2] - load b[2]
- // load a[3] + load b[3]
- // Reordering the second load b[1] + load a[1] would allow us to vectorize
- // this code and we still retain AllSameOpcode property.
- // FIXME: This load reordering might break AllSameOpcode in some rare cases
- // such as-
- // add a[0],c[0] load b[0]
- // add a[1],c[2] load b[1]
- // b[2] load b[2]
- // add a[3],c[3] load b[3]
- for (unsigned j = 0, e = VL.size() - 1; j < e; ++j) {
- if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
- if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
- if (isConsecutiveAccess(L, L1, *DL, *SE)) {
- auto *VL1 = cast<Instruction>(VL[j]);
- auto *VL2 = cast<Instruction>(VL[j + 1]);
- if (isCommutative(VL2)) {
- std::swap(Left[j + 1], Right[j + 1]);
- continue;
- }
- if (isCommutative(VL1)) {
- std::swap(Left[j], Right[j]);
- continue;
- }
- }
- }
- }
- if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
- if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
- if (isConsecutiveAccess(L, L1, *DL, *SE)) {
- auto *VL1 = cast<Instruction>(VL[j]);
- auto *VL2 = cast<Instruction>(VL[j + 1]);
- if (isCommutative(VL2)) {
- std::swap(Left[j + 1], Right[j + 1]);
- continue;
- }
- if (isCommutative(VL1)) {
- std::swap(Left[j], Right[j]);
- continue;
- }
- }
- }
- }
- // else unchanged
- }
+ VLOperands Ops(VL, DL, SE);
+ // Reorder the operands in place.
+ Ops.reorder();
+ Left = Ops.getVL(0);
+ Right = Ops.getVL(1);
}
void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL,
OpenPOWER on IntegriCloud