diff options
| author | Krzysztof Parzyszek <kparzysz@codeaurora.org> | 2017-12-06 16:40:37 +0000 |
|---|---|---|
| committer | Krzysztof Parzyszek <kparzysz@codeaurora.org> | 2017-12-06 16:40:37 +0000 |
| commit | 7d37dd8902e8ef8583cdbbe906eda8435426c996 (patch) | |
| tree | 1c8e0961ebc4986c6a8c6c510a4d4fc38b38a37d /llvm/lib/Target/Hexagon | |
| parent | aa902be15897c3fc1486481648dc3958e7aeba81 (diff) | |
| download | bcm5719-llvm-7d37dd8902e8ef8583cdbbe906eda8435426c996.tar.gz bcm5719-llvm-7d37dd8902e8ef8583cdbbe906eda8435426c996.zip | |
[Hexagon] Generate HVX code for vector construction and access
Support for:
- build vector,
- extract vector element, subvector,
- insert vector element, subvector,
- shuffle.
llvm-svn: 319901
Diffstat (limited to 'llvm/lib/Target/Hexagon')
| -rw-r--r-- | llvm/lib/Target/Hexagon/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h | 17 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp | 1924 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLowering.cpp | 313 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLowering.h | 57 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp | 299 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonPatterns.td | 96 |
8 files changed, 2468 insertions, 248 deletions
diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt index ac6a5fcd081..9f30f2b7cc3 100644 --- a/llvm/lib/Target/Hexagon/CMakeLists.txt +++ b/llvm/lib/Target/Hexagon/CMakeLists.txt @@ -35,7 +35,9 @@ add_llvm_target(HexagonCodeGen HexagonHazardRecognizer.cpp HexagonInstrInfo.cpp HexagonISelDAGToDAG.cpp + HexagonISelDAGToDAGHVX.cpp HexagonISelLowering.cpp + HexagonISelLoweringHVX.cpp HexagonLoopIdiomRecognition.cpp HexagonMachineFunctionInfo.cpp HexagonMachineScheduler.cpp diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 2551fe5a140..d0cd143a4d4 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -754,7 +754,6 @@ void HexagonDAGToDAGISel::SelectBitcast(SDNode *N) { CurDAG->RemoveDeadNode(N); } - void HexagonDAGToDAGISel::Select(SDNode *N) { if (N->isMachineOpcode()) return N->setNodeId(-1); // Already selected. @@ -772,6 +771,13 @@ void HexagonDAGToDAGISel::Select(SDNode *N) { case ISD::INTRINSIC_WO_CHAIN: return SelectIntrinsicWOChain(N); } + if (HST->useHVXOps()) { + switch (N->getOpcode()) { + case ISD::VECTOR_SHUFFLE: return SelectHvxShuffle(N); + case HexagonISD::VROR: return SelectHvxRor(N); + } + } + SelectCode(N); } diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h index 4a7f4b79f8f..e3e22a39fc1 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h @@ -26,6 +26,7 @@ namespace llvm { class MachineFunction; class HexagonInstrInfo; class HexagonRegisterInfo; +class HexagonTargetLowering; class HexagonDAGToDAGISel : public SelectionDAGISel { const HexagonSubtarget *HST; @@ -100,13 +101,25 @@ public: void SelectConstant(SDNode *N); void SelectConstantFP(SDNode *N); void SelectBitcast(SDNode *N); - void SelectVectorShuffle(SDNode *N); - // Include the pieces autogenerated from the target description. + // Include the declarations autogenerated from the selection patterns. #define GET_DAGISEL_DECL #include "HexagonGenDAGISel.inc" private: + // This is really only to get access to ReplaceNode (which is a protected + // member). Any other members used by HvxSelector can be moved around to + // make them accessible). + friend struct HvxSelector; + + SDValue selectUndef(const SDLoc &dl, MVT ResTy) { + SDNode *U = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy); + return SDValue(U, 0); + } + + void SelectHvxShuffle(SDNode *N); + void SelectHvxRor(SDNode *N); + bool keepsLowBits(const SDValue &Val, unsigned NumBits, SDValue &Src); bool isOrEquivalentToAdd(const SDNode *N) const; bool isAlignedMemNode(const MemSDNode *N) const; diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp new file mode 100644 index 00000000000..a636e4e1557 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -0,0 +1,1924 @@ +//===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "HexagonISelDAGToDAG.h" +#include "HexagonISelLowering.h" +#include "HexagonTargetMachine.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +#include <deque> +#include <map> +#include <set> +#include <utility> +#include <vector> + +#define DEBUG_TYPE "hexagon-isel" + +using namespace llvm; + +// -------------------------------------------------------------------- +// Implementation of permutation networks. + +// Implementation of the node routing through butterfly networks: +// - Forward delta. +// - Reverse delta. +// - Benes. +// +// +// Forward delta network consists of log(N) steps, where N is the number +// of inputs. In each step, an input can stay in place, or it can get +// routed to another position[1]. The step after that consists of two +// networks, each half in size in terms of the number of nodes. In those +// terms, in the given step, an input can go to either the upper or the +// lower network in the next step. +// +// [1] Hexagon's vdelta/vrdelta allow an element to be routed to both +// positions as long as there is no conflict. + +// Here's a delta network for 8 inputs, only the switching routes are +// shown: +// +// Steps: +// |- 1 ---------------|- 2 -----|- 3 -| +// +// Inp[0] *** *** *** *** Out[0] +// \ / \ / \ / +// \ / \ / X +// \ / \ / / \ +// Inp[1] *** \ / *** X *** *** Out[1] +// \ \ / / \ / \ / +// \ \ / / X X +// \ \ / / / \ / \ +// Inp[2] *** \ \ / / *** X *** *** Out[2] +// \ \ X / / / \ \ / +// \ \ / \ / / / \ X +// \ X X / / \ / \ +// Inp[3] *** \ / \ / \ / *** *** *** Out[3] +// \ X X X / +// \ / \ / \ / \ / +// X X X X +// / \ / \ / \ / \ +// / X X X \ +// Inp[4] *** / \ / \ / \ *** *** *** Out[4] +// / X X \ \ / \ / +// / / \ / \ \ \ / X +// / / X \ \ \ / / \ +// Inp[5] *** / / \ \ *** X *** *** Out[5] +// / / \ \ \ / \ / +// / / \ \ X X +// / / \ \ / \ / \ +// Inp[6] *** / \ *** X *** *** Out[6] +// / \ / \ \ / +// / \ / \ X +// / \ / \ / \ +// Inp[7] *** *** *** *** Out[7] +// +// +// Reverse delta network is same as delta network, with the steps in +// the opposite order. +// +// +// Benes network is a forward delta network immediately followed by +// a reverse delta network. + + +// Graph coloring utility used to partition nodes into two groups: +// they will correspond to nodes routed to the upper and lower networks. +struct Coloring { + enum : uint8_t { + None = 0, + Red, + Black + }; + + using Node = int; + using MapType = std::map<Node,uint8_t>; + static constexpr Node Ignore = Node(-1); + + Coloring(ArrayRef<Node> Ord) : Order(Ord) { + build(); + if (!color()) + Colors.clear(); + } + + const MapType &colors() const { + return Colors; + } + + uint8_t other(uint8_t Color) { + if (Color == None) + return Red; + return Color == Red ? Black : Red; + } + + void dump() const; + +private: + ArrayRef<Node> Order; + MapType Colors; + std::set<Node> Needed; + + using NodeSet = std::set<Node>; + std::map<Node,NodeSet> Edges; + + Node conj(Node Pos) { + Node Num = Order.size(); + return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2; + } + + uint8_t getColor(Node N) { + auto F = Colors.find(N); + return F != Colors.end() ? F->second : None; + } + + std::pair<bool,uint8_t> getUniqueColor(const NodeSet &Nodes); + + void build(); + bool color(); +}; + +std::pair<bool,uint8_t> Coloring::getUniqueColor(const NodeSet &Nodes) { + uint8_t Color = None; + for (Node N : Nodes) { + uint8_t ColorN = getColor(N); + if (ColorN == None) + continue; + if (Color == None) + Color = ColorN; + else if (Color != None && Color != ColorN) + return { false, None }; + } + return { true, Color }; +} + +void Coloring::build() { + // Add Order[P] and Order[conj(P)] to Edges. + for (unsigned P = 0; P != Order.size(); ++P) { + Node I = Order[P]; + if (I != Ignore) { + Needed.insert(I); + Node PC = Order[conj(P)]; + if (PC != Ignore && PC != I) + Edges[I].insert(PC); + } + } + // Add I and conj(I) to Edges. + for (unsigned I = 0; I != Order.size(); ++I) { + if (!Needed.count(I)) + continue; + Node C = conj(I); + // This will create an entry in the edge table, even if I is not + // connected to any other node. This is necessary, because it still + // needs to be colored. + NodeSet &Is = Edges[I]; + if (Needed.count(C)) + Is.insert(C); + } +} + +bool Coloring::color() { + SetVector<Node> FirstQ; + auto Enqueue = [this,&FirstQ] (Node N) { + SetVector<Node> Q; + Q.insert(N); + for (unsigned I = 0; I != Q.size(); ++I) { + NodeSet &Ns = Edges[Q[I]]; + Q.insert(Ns.begin(), Ns.end()); + } + FirstQ.insert(Q.begin(), Q.end()); + }; + for (Node N : Needed) + Enqueue(N); + + for (Node N : FirstQ) { + if (Colors.count(N)) + continue; + NodeSet &Ns = Edges[N]; + auto P = getUniqueColor(Ns); + if (!P.first) + return false; + Colors[N] = other(P.second); + } + + // First, color nodes that don't have any dups. + for (auto E : Edges) { + Node N = E.first; + if (!Needed.count(conj(N)) || Colors.count(N)) + continue; + auto P = getUniqueColor(E.second); + if (!P.first) + return false; + Colors[N] = other(P.second); + } + + // Now, nodes that are still uncolored. Since the graph can be modified + // in this step, create a work queue. + std::vector<Node> WorkQ; + for (auto E : Edges) { + Node N = E.first; + if (!Colors.count(N)) + WorkQ.push_back(N); + } + + for (unsigned I = 0; I < WorkQ.size(); ++I) { + Node N = WorkQ[I]; + NodeSet &Ns = Edges[N]; + auto P = getUniqueColor(Ns); + if (P.first) { + Colors[N] = other(P.second); + continue; + } + + // Coloring failed. Split this node. + Node C = conj(N); + uint8_t ColorN = other(None); + uint8_t ColorC = other(ColorN); + NodeSet &Cs = Edges[C]; + NodeSet CopyNs = Ns; + for (Node M : CopyNs) { + uint8_t ColorM = getColor(M); + if (ColorM == ColorC) { + // Connect M with C, disconnect M from N. + Cs.insert(M); + Edges[M].insert(C); + Ns.erase(M); + Edges[M].erase(N); + } + } + Colors[N] = ColorN; + Colors[C] = ColorC; + } + + // Explicitly assign "None" all all uncolored nodes. + for (unsigned I = 0; I != Order.size(); ++I) + if (Colors.count(I) == 0) + Colors[I] = None; + + return true; +} + +LLVM_DUMP_METHOD +void Coloring::dump() const { + dbgs() << "{ Order: {"; + for (unsigned I = 0; I != Order.size(); ++I) { + Node P = Order[I]; + if (P != Ignore) + dbgs() << ' ' << P; + else + dbgs() << " -"; + } + dbgs() << " }\n"; + dbgs() << " Needed: {"; + for (Node N : Needed) + dbgs() << ' ' << N; + dbgs() << " }\n"; + + dbgs() << " Edges: {\n"; + for (auto E : Edges) { + dbgs() << " " << E.first << " -> {"; + for (auto N : E.second) + dbgs() << ' ' << N; + dbgs() << " }\n"; + } + dbgs() << " }\n"; + + static const char *const Names[] = { "None", "Red", "Black" }; + dbgs() << " Colors: {\n"; + for (auto C : Colors) + dbgs() << " " << C.first << " -> " << Names[C.second] << "\n"; + dbgs() << " }\n}\n"; +} + +// Base class of for reordering networks. They don't strictly need to be +// permutations, as outputs with repeated occurrences of an input element +// are allowed. +struct PermNetwork { + using Controls = std::vector<uint8_t>; + using ElemType = int; + static constexpr ElemType Ignore = ElemType(-1); + + enum : uint8_t { + None, + Pass, + Switch + }; + enum : uint8_t { + Forward, + Reverse + }; + + PermNetwork(ArrayRef<ElemType> Ord, unsigned Mult = 1) { + Order.assign(Ord.data(), Ord.data()+Ord.size()); + Log = 0; + + unsigned S = Order.size(); + while (S >>= 1) + ++Log; + + Table.resize(Order.size()); + for (RowType &Row : Table) + Row.resize(Mult*Log, None); + } + + void getControls(Controls &V, unsigned StartAt, uint8_t Dir) const { + unsigned Size = Order.size(); + V.resize(Size); + for (unsigned I = 0; I != Size; ++I) { + unsigned W = 0; + for (unsigned L = 0; L != Log; ++L) { + unsigned C = ctl(I, StartAt+L) == Switch; + if (Dir == Forward) + W |= C << (Log-1-L); + else + W |= C << L; + } + assert(isUInt<8>(W)); + V[I] = uint8_t(W); + } + } + + uint8_t ctl(ElemType Pos, unsigned Step) const { + return Table[Pos][Step]; + } + unsigned size() const { + return Order.size(); + } + unsigned steps() const { + return Log; + } + +protected: + unsigned Log; + std::vector<ElemType> Order; + using RowType = std::vector<uint8_t>; + std::vector<RowType> Table; +}; + +struct ForwardDeltaNetwork : public PermNetwork { + ForwardDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {} + + bool run(Controls &V) { + if (!route(Order.data(), Table.data(), size(), 0)) + return false; + getControls(V, 0, Forward); + return true; + } + +private: + bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step); +}; + +struct ReverseDeltaNetwork : public PermNetwork { + ReverseDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {} + + bool run(Controls &V) { + if (!route(Order.data(), Table.data(), size(), 0)) + return false; + getControls(V, 0, Reverse); + return true; + } + +private: + bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step); +}; + +struct BenesNetwork : public PermNetwork { + BenesNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord, 2) {} + + bool run(Controls &F, Controls &R) { + if (!route(Order.data(), Table.data(), size(), 0)) + return false; + + getControls(F, 0, Forward); + getControls(R, Log, Reverse); + return true; + } + +private: + bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step); +}; + + +bool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size, + unsigned Step) { + bool UseUp = false, UseDown = false; + ElemType Num = Size; + + // Cannot use coloring here, because coloring is used to determine + // the "big" switch, i.e. the one that changes halves, and in a forward + // network, a color can be simultaneously routed to both halves in the + // step we're working on. + for (ElemType J = 0; J != Num; ++J) { + ElemType I = P[J]; + // I is the position in the input, + // J is the position in the output. + if (I == Ignore) + continue; + uint8_t S; + if (I < Num/2) + S = (J < Num/2) ? Pass : Switch; + else + S = (J < Num/2) ? Switch : Pass; + + // U is the element in the table that needs to be updated. + ElemType U = (S == Pass) ? I : (I < Num/2 ? I+Num/2 : I-Num/2); + if (U < Num/2) + UseUp = true; + else + UseDown = true; + if (T[U][Step] != S && T[U][Step] != None) + return false; + T[U][Step] = S; + } + + for (ElemType J = 0; J != Num; ++J) + if (P[J] != Ignore && P[J] >= Num/2) + P[J] -= Num/2; + + if (Step+1 < Log) { + if (UseUp && !route(P, T, Size/2, Step+1)) + return false; + if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1)) + return false; + } + return true; +} + +bool ReverseDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size, + unsigned Step) { + unsigned Pets = Log-1 - Step; + bool UseUp = false, UseDown = false; + ElemType Num = Size; + + // In this step half-switching occurs, so coloring can be used. + Coloring G({P,Size}); + const Coloring::MapType &M = G.colors(); + if (M.empty()) + return false; + + uint8_t ColorUp = Coloring::None; + for (ElemType J = 0; J != Num; ++J) { + ElemType I = P[J]; + // I is the position in the input, + // J is the position in the output. + if (I == Ignore) + continue; + uint8_t C = M.at(I); + if (C == Coloring::None) + continue; + // During "Step", inputs cannot switch halves, so if the "up" color + // is still unknown, make sure that it is selected in such a way that + // "I" will stay in the same half. + bool InpUp = I < Num/2; + if (ColorUp == Coloring::None) + ColorUp = InpUp ? C : G.other(C); + if ((C == ColorUp) != InpUp) { + // If I should go to a different half than where is it now, give up. + return false; + } + + uint8_t S; + if (InpUp) { + S = (J < Num/2) ? Pass : Switch; + UseUp = true; + } else { + S = (J < Num/2) ? Switch : Pass; + UseDown = true; + } + T[J][Pets] = S; + } + + // Reorder the working permutation according to the computed switch table + // for the last step (i.e. Pets). + for (ElemType J = 0; J != Size/2; ++J) { + ElemType PJ = P[J]; // Current values of P[J] + ElemType PC = P[J+Size/2]; // and P[conj(J)] + ElemType QJ = PJ; // New values of P[J] + ElemType QC = PC; // and P[conj(J)] + if (T[J][Pets] == Switch) + QC = PJ; + if (T[J+Size/2][Pets] == Switch) + QJ = PC; + P[J] = QJ; + P[J+Size/2] = QC; + } + + for (ElemType J = 0; J != Num; ++J) + if (P[J] != Ignore && P[J] >= Num/2) + P[J] -= Num/2; + + if (Step+1 < Log) { + if (UseUp && !route(P, T, Size/2, Step+1)) + return false; + if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1)) + return false; + } + return true; +} + +bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size, + unsigned Step) { + Coloring G({P,Size}); + const Coloring::MapType &M = G.colors(); + if (M.empty()) + return false; + ElemType Num = Size; + + unsigned Pets = 2*Log-1 - Step; + bool UseUp = false, UseDown = false; + + // Both assignments, i.e. Red->Up and Red->Down are valid, but they will + // result in different controls. Let's pick the one where the first + // control will be "Pass". + uint8_t ColorUp = Coloring::None; + for (ElemType J = 0; J != Num; ++J) { + ElemType I = P[J]; + if (I == Ignore) + continue; + uint8_t C = M.at(I); + if (C == Coloring::None) + continue; + if (ColorUp == Coloring::None) { + ColorUp = (I < Num/2) ? Coloring::Red : Coloring::Black; + } + unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2; + if (C == ColorUp) { + if (I < Num/2) + T[I][Step] = Pass; + else + T[CI][Step] = Switch; + T[J][Pets] = (J < Num/2) ? Pass : Switch; + UseUp = true; + } else { // Down + if (I < Num/2) + T[CI][Step] = Switch; + else + T[I][Step] = Pass; + T[J][Pets] = (J < Num/2) ? Switch : Pass; + UseDown = true; + } + } + + // Reorder the working permutation according to the computed switch table + // for the last step (i.e. Pets). + for (ElemType J = 0; J != Num/2; ++J) { + ElemType PJ = P[J]; // Current values of P[J] + ElemType PC = P[J+Num/2]; // and P[conj(J)] + ElemType QJ = PJ; // New values of P[J] + ElemType QC = PC; // and P[conj(J)] + if (T[J][Pets] == Switch) + QC = PJ; + if (T[J+Num/2][Pets] == Switch) + QJ = PC; + P[J] = QJ; + P[J+Num/2] = QC; + } + + for (ElemType J = 0; J != Num; ++J) + if (P[J] != Ignore && P[J] >= Num/2) + P[J] -= Num/2; + + if (Step+1 < Log) { + if (UseUp && !route(P, T, Size/2, Step+1)) + return false; + if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1)) + return false; + } + return true; +} + +// -------------------------------------------------------------------- +// Support for building selection results (output instructions that are +// parts of the final selection). + +struct OpRef { + OpRef(SDValue V) : OpV(V) {} + bool isValue() const { return OpV.getNode() != nullptr; } + bool isValid() const { return isValue() || !(OpN & Invalid); } + static OpRef res(int N) { return OpRef(Whole | (N & Index)); } + static OpRef fail() { return OpRef(Invalid); } + + static OpRef lo(const OpRef &R) { + assert(!R.isValue()); + return OpRef(R.OpN & (Undef | Index | LoHalf)); + } + static OpRef hi(const OpRef &R) { + assert(!R.isValue()); + return OpRef(R.OpN & (Undef | Index | HiHalf)); + } + static OpRef undef(MVT Ty) { return OpRef(Undef | Ty.SimpleTy); } + + // Direct value. + SDValue OpV = SDValue(); + + // Reference to the operand of the input node: + // If the 31st bit is 1, it's undef, otherwise, bits 28..0 are the + // operand index: + // If bit 30 is set, it's the high half of the operand. + // If bit 29 is set, it's the low half of the operand. + unsigned OpN = 0; + + enum : unsigned { + Invalid = 0x10000000, + LoHalf = 0x20000000, + HiHalf = 0x40000000, + Whole = LoHalf | HiHalf, + Undef = 0x80000000, + Index = 0x0FFFFFFF, // Mask of the index value. + IndexBits = 28, + }; + + void print(raw_ostream &OS, const SelectionDAG &G) const; + +private: + OpRef(unsigned N) : OpN(N) {} +}; + +struct NodeTemplate { + NodeTemplate() = default; + unsigned Opc = 0; + MVT Ty = MVT::Other; + std::vector<OpRef> Ops; + + void print(raw_ostream &OS, const SelectionDAG &G) const; +}; + +struct ResultStack { + ResultStack(SDNode *Inp) + : InpNode(Inp), InpTy(Inp->getValueType(0).getSimpleVT()) {} + SDNode *InpNode; + MVT InpTy; + unsigned push(const NodeTemplate &Res) { + List.push_back(Res); + return List.size()-1; + } + unsigned push(unsigned Opc, MVT Ty, std::vector<OpRef> &&Ops) { + NodeTemplate Res; + Res.Opc = Opc; + Res.Ty = Ty; + Res.Ops = Ops; + return push(Res); + } + bool empty() const { return List.empty(); } + unsigned size() const { return List.size(); } + unsigned top() const { return size()-1; } + const NodeTemplate &operator[](unsigned I) const { return List[I]; } + unsigned reset(unsigned NewTop) { + List.resize(NewTop+1); + return NewTop; + } + + using BaseType = std::vector<NodeTemplate>; + BaseType::iterator begin() { return List.begin(); } + BaseType::iterator end() { return List.end(); } + BaseType::const_iterator begin() const { return List.begin(); } + BaseType::const_iterator end() const { return List.end(); } + + BaseType List; + + void print(raw_ostream &OS, const SelectionDAG &G) const; +}; + +void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const { + if (isValue()) { + OpV.getNode()->print(OS, &G); + return; + } + if (OpN & Invalid) { + OS << "invalid"; + return; + } + if (OpN & Undef) { + OS << "undef"; + return; + } + if ((OpN & Whole) != Whole) { + assert((OpN & Whole) == LoHalf || (OpN & Whole) == HiHalf); + if (OpN & LoHalf) + OS << "lo "; + else + OS << "hi "; + } + OS << '#' << SignExtend32(OpN & Index, IndexBits); +} + +void NodeTemplate::print(raw_ostream &OS, const SelectionDAG &G) const { + const TargetInstrInfo &TII = *G.getSubtarget().getInstrInfo(); + OS << format("%8s", EVT(Ty).getEVTString().c_str()) << " " + << TII.getName(Opc); + bool Comma = false; + for (const auto &R : Ops) { + if (Comma) + OS << ','; + Comma = true; + OS << ' '; + R.print(OS, G); + } +} + +void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const { + OS << "Input node:\n"; + InpNode->dumpr(&G); + OS << "Result templates:\n"; + for (unsigned I = 0, E = List.size(); I != E; ++I) { + OS << '[' << I << "] "; + List[I].print(OS, G); + OS << '\n'; + } +} + +struct ShuffleMask { + ShuffleMask(ArrayRef<int> M) : Mask(M) { + for (unsigned I = 0, E = Mask.size(); I != E; ++I) { + int M = Mask[I]; + if (M == -1) + continue; + MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M); + MaxSrc = (MaxSrc == -1) ? M : std::max(MaxSrc, M); + } + } + + ArrayRef<int> Mask; + int MinSrc = -1, MaxSrc = -1; + + ShuffleMask lo() const { + size_t H = Mask.size()/2; + return ShuffleMask({Mask.data(), H}); + } + ShuffleMask hi() const { + size_t H = Mask.size()/2; + return ShuffleMask({Mask.data()+H, H}); + } +}; + +// -------------------------------------------------------------------- +// The HvxSelector class. + +static const HexagonTargetLowering &getHexagonLowering(SelectionDAG &G) { + return static_cast<const HexagonTargetLowering&>(G.getTargetLoweringInfo()); +} +static const HexagonSubtarget &getHexagonSubtarget(SelectionDAG &G) { + return static_cast<const HexagonSubtarget&>(G.getSubtarget()); +} + +namespace llvm { + struct HvxSelector { + const HexagonTargetLowering &Lower; + HexagonDAGToDAGISel &ISel; + SelectionDAG &DAG; + const HexagonSubtarget &HST; + const unsigned HwLen; + + HvxSelector(HexagonDAGToDAGISel &HS, SelectionDAG &G) + : Lower(getHexagonLowering(G)), ISel(HS), DAG(G), + HST(getHexagonSubtarget(G)), HwLen(HST.getVectorLength()) {} + + MVT getSingleVT(MVT ElemTy) const { + unsigned NumElems = HwLen / (ElemTy.getSizeInBits()/8); + return MVT::getVectorVT(ElemTy, NumElems); + } + + MVT getPairVT(MVT ElemTy) const { + unsigned NumElems = (2*HwLen) / (ElemTy.getSizeInBits()/8); + return MVT::getVectorVT(ElemTy, NumElems); + } + + void selectShuffle(SDNode *N); + void selectRor(SDNode *N); + + private: + void materialize(const ResultStack &Results); + + SDValue getVectorConstant(ArrayRef<uint8_t> Data, const SDLoc &dl); + + enum : unsigned { + None, + PackMux, + }; + OpRef concat(OpRef Va, OpRef Vb, ResultStack &Results); + OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results, + MutableArrayRef<int> NewMask, unsigned Options = None); + OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results, + MutableArrayRef<int> NewMask); + OpRef zerous(ShuffleMask SM, OpRef Va, ResultStack &Results); + OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb, + ResultStack &Results); + OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb, + ResultStack &Results); + + OpRef shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results); + OpRef shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results); + OpRef shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results); + OpRef shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results); + + OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results); + OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results); + OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &Results); + OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &Results); + + bool selectVectorConstants(SDNode *N); + bool scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl, MVT ResTy, + SDValue Va, SDValue Vb, SDNode *N); + + }; +} + +// Return a submask of A that is shorter than A by |C| elements: +// - if C > 0, return a submask of A that starts at position C, +// - if C <= 0, return a submask of A that starts at 0 (reduce A by |C|). +static ArrayRef<int> subm(ArrayRef<int> A, int C) { + if (C > 0) + return { A.data()+C, A.size()-C }; + return { A.data(), A.size()+C }; +} + +static void splitMask(ArrayRef<int> Mask, MutableArrayRef<int> MaskL, + MutableArrayRef<int> MaskR) { + unsigned VecLen = Mask.size(); + assert(MaskL.size() == VecLen && MaskR.size() == VecLen); + for (unsigned I = 0; I != VecLen; ++I) { + int M = Mask[I]; + if (M < 0) { + MaskL[I] = MaskR[I] = -1; + } else if (unsigned(M) < VecLen) { + MaskL[I] = M; + MaskR[I] = -1; + } else { + MaskL[I] = -1; + MaskR[I] = M-VecLen; + } + } +} + +static std::pair<int,unsigned> findStrip(ArrayRef<int> A, int Inc, + unsigned MaxLen) { + assert(A.size() > 0 && A.size() >= MaxLen); + int F = A[0]; + int E = F; + for (unsigned I = 1; I != MaxLen; ++I) { + if (A[I] - E != Inc) + return { F, I }; + E = A[I]; + } + return { F, MaxLen }; +} + +static bool isUndef(ArrayRef<int> Mask) { + for (int Idx : Mask) + if (Idx != -1) + return false; + return true; +} + +static bool isIdentity(ArrayRef<int> Mask) { + unsigned Size = Mask.size(); + return findStrip(Mask, 1, Size) == std::make_pair(0, Size); +} + +static bool isPermutation(ArrayRef<int> Mask) { + // Check by adding all numbers only works if there is no overflow. + assert(Mask.size() < 0x00007FFF && "Sanity failure"); + int Sum = 0; + for (int Idx : Mask) { + if (Idx == -1) + return false; + Sum += Idx; + } + int N = Mask.size(); + return 2*Sum == N*(N-1); +} + +bool HvxSelector::selectVectorConstants(SDNode *N) { + // Constant vectors are generated as loads from constant pools. + // Since they are generated during the selection process, the main + // selection algorithm is not aware of them. Select them directly + // here. + if (!N->isMachineOpcode() && N->getOpcode() == ISD::LOAD) { + SDValue Addr = cast<LoadSDNode>(N)->getBasePtr(); + unsigned AddrOpc = Addr.getOpcode(); + if (AddrOpc == HexagonISD::AT_PCREL || AddrOpc == HexagonISD::CP) { + if (Addr.getOperand(0).getOpcode() == ISD::TargetConstantPool) { + ISel.Select(N); + return true; + } + } + } + + bool Selected = false; + for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I) + Selected = selectVectorConstants(N->getOperand(I).getNode()) || Selected; + return Selected; +} + +void HvxSelector::materialize(const ResultStack &Results) { + DEBUG_WITH_TYPE("isel", { + dbgs() << "Materializing\n"; + Results.print(dbgs(), DAG); + }); + if (Results.empty()) + return; + const SDLoc &dl(Results.InpNode); + std::vector<SDValue> Output; + + for (unsigned I = 0, E = Results.size(); I != E; ++I) { + const NodeTemplate &Node = Results[I]; + std::vector<SDValue> Ops; + for (const OpRef &R : Node.Ops) { + assert(R.isValid()); + if (R.isValue()) { + Ops.push_back(R.OpV); + continue; + } + if (R.OpN & OpRef::Undef) { + MVT::SimpleValueType SVT = MVT::SimpleValueType(R.OpN & OpRef::Index); + Ops.push_back(ISel.selectUndef(dl, MVT(SVT))); + continue; + } + // R is an index of a result. + unsigned Part = R.OpN & OpRef::Whole; + int Idx = SignExtend32(R.OpN & OpRef::Index, OpRef::IndexBits); + if (Idx < 0) + Idx += I; + assert(Idx >= 0 && unsigned(Idx) < Output.size()); + SDValue Op = Output[Idx]; + MVT OpTy = Op.getValueType().getSimpleVT(); + if (Part != OpRef::Whole) { + assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf); + if (Op.getOpcode() == HexagonISD::VCOMBINE) { + Op = (Part == OpRef::HiHalf) ? Op.getOperand(0) : Op.getOperand(1); + } else { + MVT HalfTy = MVT::getVectorVT(OpTy.getVectorElementType(), + OpTy.getVectorNumElements()/2); + unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo + : Hexagon::vsub_hi; + Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op); + } + } + Ops.push_back(Op); + } // for (Node : Results) + + assert(Node.Ty != MVT::Other); + SDNode *ResN = (Node.Opc == TargetOpcode::COPY) + ? Ops.front().getNode() + : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops); + Output.push_back(SDValue(ResN, 0)); + } + + SDNode *OutN = Output.back().getNode(); + SDNode *InpN = Results.InpNode; + DEBUG_WITH_TYPE("isel", { + dbgs() << "Generated node:\n"; + OutN->dumpr(&DAG); + }); + + ISel.ReplaceNode(InpN, OutN); + selectVectorConstants(OutN); + DAG.RemoveDeadNodes(); +} + +OpRef HvxSelector::concat(OpRef Lo, OpRef Hi, ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + const SDLoc &dl(Results.InpNode); + Results.push(TargetOpcode::REG_SEQUENCE, getPairVT(MVT::i8), { + DAG.getTargetConstant(Hexagon::HvxWRRegClassID, dl, MVT::i32), + Lo, DAG.getTargetConstant(Hexagon::vsub_lo, dl, MVT::i32), + Hi, DAG.getTargetConstant(Hexagon::vsub_hi, dl, MVT::i32), + }); + return OpRef::res(Results.top()); +} + +// Va, Vb are single vectors, SM can be arbitrarily long. +OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb, + ResultStack &Results, MutableArrayRef<int> NewMask, + unsigned Options) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + if (!Va.isValid() || !Vb.isValid()) + return OpRef::fail(); + + int VecLen = SM.Mask.size(); + MVT Ty = getSingleVT(MVT::i8); + + if (SM.MaxSrc - SM.MinSrc < int(HwLen)) { + if (SM.MaxSrc < int(HwLen)) { + memcpy(NewMask.data(), SM.Mask.data(), sizeof(int)*VecLen); + return Va; + } + if (SM.MinSrc >= int(HwLen)) { + for (int I = 0; I != VecLen; ++I) { + int M = SM.Mask[I]; + if (M != -1) + M -= HwLen; + NewMask[I] = M; + } + return Vb; + } + const SDLoc &dl(Results.InpNode); + SDValue S = DAG.getTargetConstant(SM.MinSrc, dl, MVT::i32); + if (isUInt<3>(SM.MinSrc)) { + Results.push(Hexagon::V6_valignbi, Ty, {Vb, Va, S}); + } else { + Results.push(Hexagon::A2_tfrsi, MVT::i32, {S}); + unsigned Top = Results.top(); + Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(Top)}); + } + for (int I = 0; I != VecLen; ++I) { + int M = SM.Mask[I]; + if (M != -1) + M -= SM.MinSrc; + NewMask[I] = M; + } + return OpRef::res(Results.top()); + } + + if (Options & PackMux) { + // If elements picked from Va and Vb have all different (source) indexes + // (relative to the start of the argument), do a mux, and update the mask. + BitVector Picked(HwLen); + SmallVector<uint8_t,128> MuxBytes(HwLen); + bool CanMux = true; + for (int I = 0; I != VecLen; ++I) { + int M = SM.Mask[I]; + if (M == -1) + continue; + if (M >= int(HwLen)) + M -= HwLen; + else + MuxBytes[M] = 0xFF; + if (Picked[M]) { + CanMux = false; + break; + } + NewMask[I] = M; + } + if (CanMux) + return vmuxs(MuxBytes, Va, Vb, Results); + } + + return OpRef::fail(); +} + +OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb, + ResultStack &Results, MutableArrayRef<int> NewMask) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + unsigned HalfMask = 0; + unsigned LogHw = Log2_32(HwLen); + for (int M : SM.Mask) { + if (M == -1) + continue; + HalfMask |= (1u << (M >> LogHw)); + } + + if (HalfMask == 0) + return OpRef::undef(getPairVT(MVT::i8)); + + // If more than two halves are used, bail. + // TODO: be more aggressive here? + if (countPopulation(HalfMask) > 2) + return OpRef::fail(); + + MVT HalfTy = getSingleVT(MVT::i8); + + OpRef Inp[2] = { Va, Vb }; + OpRef Out[2] = { OpRef::undef(HalfTy), OpRef::undef(HalfTy) }; + + uint8_t HalfIdx[4] = { 0xFF, 0xFF, 0xFF, 0xFF }; + unsigned Idx = 0; + for (unsigned I = 0; I != 4; ++I) { + if ((HalfMask & (1u << I)) == 0) + continue; + assert(Idx < 2); + OpRef Op = Inp[I/2]; + Out[Idx] = (I & 1) ? OpRef::hi(Op) : OpRef::lo(Op); + HalfIdx[I] = Idx++; + } + + int VecLen = SM.Mask.size(); + for (int I = 0; I != VecLen; ++I) { + int M = SM.Mask[I]; + if (M >= 0) { + uint8_t Idx = HalfIdx[M >> LogHw]; + assert(Idx == 0 || Idx == 1); + M = (M & (HwLen-1)) + HwLen*Idx; + } + NewMask[I] = M; + } + + return concat(Out[0], Out[1], Results); +} + +OpRef HvxSelector::zerous(ShuffleMask SM, OpRef Va, ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + + int VecLen = SM.Mask.size(); + SmallVector<uint8_t,128> UsedBytes(VecLen); + bool HasUnused = false; + for (int I = 0; I != VecLen; ++I) { + if (SM.Mask[I] != -1) + UsedBytes[I] = 0xFF; + else + HasUnused = true; + } + if (!HasUnused) + return Va; + SDValue B = getVectorConstant(UsedBytes, SDLoc(Results.InpNode)); + Results.push(Hexagon::V6_vand, getSingleVT(MVT::i8), {Va, OpRef(B)}); + return OpRef::res(Results.top()); +} + +OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb, + ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + MVT ByteTy = getSingleVT(MVT::i8); + MVT BoolTy = MVT::getVectorVT(MVT::i1, 8*HwLen); // XXX + const SDLoc &dl(Results.InpNode); + SDValue B = getVectorConstant(Bytes, dl); + Results.push(Hexagon::V6_vd0, ByteTy, {}); + Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(B), OpRef::res(-1)}); + Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Va, Vb}); + return OpRef::res(Results.top()); +} + +OpRef HvxSelector::vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb, + ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + size_t S = Bytes.size() / 2; + OpRef L = vmuxs({Bytes.data(), S}, OpRef::lo(Va), OpRef::lo(Vb), Results); + OpRef H = vmuxs({Bytes.data()+S, S}, OpRef::hi(Va), OpRef::hi(Vb), Results); + return concat(L, H, Results); +} + +OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + unsigned VecLen = SM.Mask.size(); + assert(HwLen == VecLen); + assert(all_of(SM.Mask, [this](int M) { return M == -1 || M < int(HwLen); })); + + if (isIdentity(SM.Mask)) + return Va; + if (isUndef(SM.Mask)) + return OpRef::undef(getSingleVT(MVT::i8)); + + return butterfly(SM, Va, Results); +} + +OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, + ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + OpRef C = contracting(SM, Va, Vb, Results); + if (C.isValid()) + return C; + + int VecLen = SM.Mask.size(); + SmallVector<int,128> NewMask(VecLen); + OpRef P = packs(SM, Va, Vb, Results, NewMask); + if (P.isValid()) + return shuffs1(ShuffleMask(NewMask), P, Results); + + SmallVector<int,128> MaskL(VecLen), MaskR(VecLen); + splitMask(SM.Mask, MaskL, MaskR); + + OpRef L = shuffs1(ShuffleMask(MaskL), Va, Results); + OpRef R = shuffs1(ShuffleMask(MaskR), Vb, Results); + if (!L.isValid() || !R.isValid()) + return OpRef::fail(); + + SmallVector<uint8_t,128> Bytes(VecLen); + for (int I = 0; I != VecLen; ++I) { + if (MaskL[I] != -1) + Bytes[I] = 0xFF; + } + return vmuxs(Bytes, L, R, Results); +} + +OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + int VecLen = SM.Mask.size(); + + SmallVector<int,128> PackedMask(VecLen); + OpRef P = packs(SM, OpRef::lo(Va), OpRef::hi(Va), Results, PackedMask); + if (P.isValid()) { + ShuffleMask PM(PackedMask); + OpRef E = expanding(PM, P, Results); + if (E.isValid()) + return E; + + OpRef L = shuffs1(PM.lo(), P, Results); + OpRef H = shuffs1(PM.hi(), P, Results); + if (L.isValid() && H.isValid()) + return concat(L, H, Results); + } + + OpRef R = perfect(SM, Va, Results); + if (R.isValid()) + return R; + // TODO commute the mask and try the opposite order of the halves. + + OpRef L = shuffs2(SM.lo(), OpRef::lo(Va), OpRef::hi(Va), Results); + OpRef H = shuffs2(SM.hi(), OpRef::lo(Va), OpRef::hi(Va), Results); + if (L.isValid() && H.isValid()) + return concat(L, H, Results); + + return OpRef::fail(); +} + +OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, + ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + int VecLen = SM.Mask.size(); + + SmallVector<int,256> PackedMask(VecLen); + OpRef P = packp(SM, Va, Vb, Results, PackedMask); + if (P.isValid()) + return shuffp1(ShuffleMask(PackedMask), P, Results); + + SmallVector<int,256> MaskL(VecLen), MaskR(VecLen); + OpRef L = shuffp1(ShuffleMask(MaskL), Va, Results); + OpRef R = shuffp1(ShuffleMask(MaskR), Vb, Results); + if (!L.isValid() || !R.isValid()) + return OpRef::fail(); + + // Mux the results. + SmallVector<uint8_t,256> Bytes(VecLen); + for (int I = 0; I != VecLen; ++I) { + if (MaskL[I] != -1) + Bytes[I] = 0xFF; + } + return vmuxp(Bytes, L, R, Results); +} + +bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl, + MVT ResTy, SDValue Va, SDValue Vb, + SDNode *N) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + MVT ElemTy = ResTy.getVectorElementType(); + assert(ElemTy == MVT::i8); + unsigned VecLen = Mask.size(); + bool HavePairs = (2*HwLen == VecLen); + MVT SingleTy = getSingleVT(MVT::i8); + + SmallVector<SDValue,128> Ops; + for (int I : Mask) { + if (I < 0) { + Ops.push_back(ISel.selectUndef(dl, ElemTy)); + continue; + } + SDValue Vec; + unsigned M = I; + if (M < VecLen) { + Vec = Va; + } else { + Vec = Vb; + M -= VecLen; + } + if (HavePairs) { + if (M < HwLen) { + Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, Vec); + } else { + Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, Vec); + M -= HwLen; + } + } + SDValue Idx = DAG.getConstant(M, dl, MVT::i32); + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemTy, {Vec, Idx}); + SDValue L = Lower.LowerOperation(Ex, DAG); + assert(L.getNode()); + Ops.push_back(L); + } + + SDValue LV; + if (2*HwLen == VecLen) { + SDValue B0 = DAG.getBuildVector(SingleTy, dl, {Ops.data(), HwLen}); + SDValue L0 = Lower.LowerOperation(B0, DAG); + SDValue B1 = DAG.getBuildVector(SingleTy, dl, {Ops.data()+HwLen, HwLen}); + SDValue L1 = Lower.LowerOperation(B1, DAG); + // XXX CONCAT_VECTORS is legal for HVX vectors. Legalizing (lowering) + // functions may expect to be called only for illegal operations, so + // make sure that they are not called for legal ones. Develop a better + // mechanism for dealing with this. + LV = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, {L0, L1}); + } else { + SDValue BV = DAG.getBuildVector(ResTy, dl, Ops); + LV = Lower.LowerOperation(BV, DAG); + } + + assert(!N->use_empty()); + ISel.ReplaceNode(N, LV.getNode()); + DAG.RemoveDeadNodes(); + + std::deque<SDNode*> SubNodes; + SubNodes.push_back(LV.getNode()); + for (unsigned I = 0; I != SubNodes.size(); ++I) { + for (SDValue Op : SubNodes[I]->ops()) + SubNodes.push_back(Op.getNode()); + } + while (!SubNodes.empty()) { + SDNode *S = SubNodes.front(); + SubNodes.pop_front(); + if (S->use_empty()) + continue; + // This isn't great, but users need to be selected before any nodes that + // they use. (The reason is to match larger patterns, and avoid nodes that + // cannot be matched on their own, e.g. ValueType, TokenFactor, etc.). + bool PendingUser = llvm::any_of(S->uses(), [&SubNodes](const SDNode *U) { + return llvm::any_of(SubNodes, [U](const SDNode *T) { + return T == U; + }); + }); + if (PendingUser) + SubNodes.push_back(S); + else + ISel.Select(S); + } + + DAG.RemoveDeadNodes(); + return true; +} + +OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb, + ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + if (!Va.isValid() || !Vb.isValid()) + return OpRef::fail(); + + // Contracting shuffles, i.e. instructions that always discard some bytes + // from the operand vectors. + // + // V6_vshuff{e,o}b + // V6_vdealb4w + // V6_vpack{e,o}{b,h} + + int VecLen = SM.Mask.size(); + std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen); + MVT ResTy = getSingleVT(MVT::i8); + + // The following shuffles only work for bytes and halfwords. This requires + // the strip length to be 1 or 2. + if (Strip.second != 1 && Strip.second != 2) + return OpRef::fail(); + + // The patterns for the shuffles, in terms of the starting offsets of the + // consecutive strips (L = length of the strip, N = VecLen): + // + // vpacke: 0, 2L, 4L ... N+0, N+2L, N+4L ... L = 1 or 2 + // vpacko: L, 3L, 5L ... N+L, N+3L, N+5L ... L = 1 or 2 + // + // vshuffe: 0, N+0, 2L, N+2L, 4L ... L = 1 or 2 + // vshuffo: L, N+L, 3L, N+3L, 5L ... L = 1 or 2 + // + // vdealb4w: 0, 4, 8 ... 2, 6, 10 ... N+0, N+4, N+8 ... N+2, N+6, N+10 ... + + // The value of the element in the mask following the strip will decide + // what kind of a shuffle this can be. + int NextInMask = SM.Mask[Strip.second]; + + // Check if NextInMask could be 2L, 3L or 4, i.e. if it could be a mask + // for vpack or vdealb4w. VecLen > 4, so NextInMask for vdealb4w would + // satisfy this. + if (NextInMask < VecLen) { + // vpack{e,o} or vdealb4w + if (Strip.first == 0 && Strip.second == 1 && NextInMask == 4) { + int N = VecLen; + // Check if this is vdealb4w (L=1). + for (int I = 0; I != N/4; ++I) + if (SM.Mask[I] != 4*I) + return OpRef::fail(); + for (int I = 0; I != N/4; ++I) + if (SM.Mask[I+N/4] != 2 + 4*I) + return OpRef::fail(); + for (int I = 0; I != N/4; ++I) + if (SM.Mask[I+N/2] != N + 4*I) + return OpRef::fail(); + for (int I = 0; I != N/4; ++I) + if (SM.Mask[I+3*N/4] != N+2 + 4*I) + return OpRef::fail(); + // Matched mask for vdealb4w. + Results.push(Hexagon::V6_vdealb4w, ResTy, {Vb, Va}); + return OpRef::res(Results.top()); + } + + // Check if this is vpack{e,o}. + int N = VecLen; + int L = Strip.second; + // Check if the first strip starts at 0 or at L. + if (Strip.first != 0 && Strip.first != L) + return OpRef::fail(); + // Examine the rest of the mask. + for (int I = L; I < N/2; I += L) { + auto S = findStrip(subm(SM.Mask,I), 1, N-I); + // Check whether the mask element at the beginning of each strip + // increases by 2L each time. + if (S.first - Strip.first != 2*I) + return OpRef::fail(); + // Check whether each strip is of the same length. + if (S.second != unsigned(L)) + return OpRef::fail(); + } + + // Strip.first == 0 => vpacke + // Strip.first == L => vpacko + assert(Strip.first == 0 || Strip.first == L); + using namespace Hexagon; + NodeTemplate Res; + Res.Opc = Strip.second == 1 // Number of bytes. + ? (Strip.first == 0 ? V6_vpackeb : V6_vpackob) + : (Strip.first == 0 ? V6_vpackeh : V6_vpackoh); + Res.Ty = ResTy; + Res.Ops = { Vb, Va }; + Results.push(Res); + return OpRef::res(Results.top()); + } + + // Check if this is vshuff{e,o}. + int N = VecLen; + int L = Strip.second; + std::pair<int,unsigned> PrevS = Strip; + bool Flip = false; + for (int I = L; I < N; I += L) { + auto S = findStrip(subm(SM.Mask,I), 1, N-I); + if (S.second != PrevS.second) + return OpRef::fail(); + int Diff = Flip ? PrevS.first - S.first + 2*L + : S.first - PrevS.first; + if (Diff != N) + return OpRef::fail(); + Flip ^= true; + PrevS = S; + } + // Strip.first == 0 => vshuffe + // Strip.first == L => vshuffo + assert(Strip.first == 0 || Strip.first == L); + using namespace Hexagon; + NodeTemplate Res; + Res.Opc = Strip.second == 1 // Number of bytes. + ? (Strip.first == 0 ? V6_vshuffeb : V6_vshuffob) + : (Strip.first == 0 ? V6_vshufeh : V6_vshufoh); + Res.Ty = ResTy; + Res.Ops = { Vb, Va }; + Results.push(Res); + return OpRef::res(Results.top()); +} + +OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + // Expanding shuffles (using all elements and inserting into larger vector): + // + // V6_vunpacku{b,h} [*] + // + // [*] Only if the upper elements (filled with 0s) are "don't care" in Mask. + // + // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so + // they are not shuffles. + // + // The argument is a single vector. + + int VecLen = SM.Mask.size(); + assert(2*HwLen == unsigned(VecLen) && "Expecting vector-pair type"); + + std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen); + + // The patterns for the unpacks, in terms of the starting offsets of the + // consecutive strips (L = length of the strip, N = VecLen): + // + // vunpacku: 0, -1, L, -1, 2L, -1 ... + + if (Strip.first != 0) + return OpRef::fail(); + + // The vunpackus only handle byte and half-word. + if (Strip.second != 1 && Strip.second != 2) + return OpRef::fail(); + + int N = VecLen; + int L = Strip.second; + + // First, check the non-ignored strips. + for (int I = 2*L; I < 2*N; I += 2*L) { + auto S = findStrip(subm(SM.Mask,I), 1, N-I); + if (S.second != unsigned(L)) + return OpRef::fail(); + if (2*S.first != I) + return OpRef::fail(); + } + // Check the -1s. + for (int I = L; I < 2*N; I += 2*L) { + auto S = findStrip(subm(SM.Mask,I), 0, N-I); + if (S.first != -1 || S.second != unsigned(L)) + return OpRef::fail(); + } + + unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub + : Hexagon::V6_vunpackuh; + Results.push(Opc, getPairVT(MVT::i8), {Va}); + return OpRef::res(Results.top()); +} + +OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + // V6_vdeal{b,h} + // V6_vshuff{b,h} + + // V6_vshufoe{b,h} those are quivalent to vshuffvdd(..,{1,2}) + // V6_vshuffvdd (V6_vshuff) + // V6_dealvdd (V6_vdeal) + + // TODO Recognize patterns for V6_vdeal{b,h} and V6_vshuff{b,h}. + + int VecLen = SM.Mask.size(); + assert(isPowerOf2_32(VecLen) && Log2_32(VecLen) <= 8); + unsigned LogLen = Log2_32(VecLen); + + if (!isPermutation(SM.Mask)) + return OpRef::fail(); + + SmallVector<unsigned,8> Perm(LogLen); + + // Check if this could be a perfect shuffle, or a combination of perfect + // shuffles. + // + // Consider this permutation (using hex digits to make the ASCII diagrams + // easier to read): + // { 0, 8, 1, 9, 2, A, 3, B, 4, C, 5, D, 6, E, 7, F }. + // This is a "deal" operation: divide the input into two halves, and + // create the output by picking elements by alternating between these two + // halves: + // 0 1 2 3 4 5 6 7 --> 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F [*] + // 8 9 A B C D E F + // + // Aside from a few special explicit cases (V6_vdealb, etc.), HVX provides + // a somwehat different mechanism that could be used to perform shuffle/ + // deal operations: a 2x2 transpose. + // Consider the halves of inputs again, they can be interpreted as a 2x8 + // matrix. A 2x8 matrix can be looked at four 2x2 matrices concatenated + // together. Now, when considering 2 elements at a time, it will be a 2x4 + // matrix (with elements 01, 23, 45, etc.), or two 2x2 matrices: + // 01 23 45 67 + // 89 AB CD EF + // With groups of 4, this will become a single 2x2 matrix, and so on. + // + // The 2x2 transpose instruction works by transposing each of the 2x2 + // matrices (or "sub-matrices"), given a specific group size. For example, + // if the group size is 1 (i.e. each element is its own group), there + // will be four transposes of the four 2x2 matrices that form the 2x8. + // For example, with the inputs as above, the result will be: + // 0 8 2 A 4 C 6 E + // 1 9 3 B 5 D 7 F + // Now, this result can be tranposed again, but with the group size of 2: + // 08 19 4C 5D + // 2A 3B 6E 7F + // If we then transpose that result, but with the group size of 4, we get: + // 0819 2A3B + // 4C5D 6E7F + // If we concatenate these two rows, it will be + // 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F + // which is the same as the "deal" [*] above. + // + // In general, a "deal" of individual elements is a series of 2x2 transposes, + // with changing group size. HVX has two instructions: + // Vdd = V6_vdealvdd Vu, Vv, Rt + // Vdd = V6_shufvdd Vu, Vv, Rt + // that perform exactly that. The register Rt controls which transposes are + // going to happen: a bit at position n (counting from 0) indicates that a + // transpose with a group size of 2^n will take place. If multiple bits are + // set, multiple transposes will happen: vdealvdd will perform them starting + // with the largest group size, vshuffvdd will do them in the reverse order. + // + // The main observation is that each 2x2 transpose corresponds to swapping + // columns of bits in the binary representation of the values. + // + // The numbers {3,2,1,0} and the log2 of the number of contiguous 1 bits + // in a given column. The * denote the columns that will be swapped. + // The transpose with the group size 2^n corresponds to swapping columns + // 3 (the highest log) and log2(n): + // + // 3 2 1 0 0 2 1 3 0 2 3 1 + // * * * * * * + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 1 0 0 0 1 8 1 0 0 0 8 1 0 0 0 8 1 0 0 0 + // 2 0 0 1 0 2 0 0 1 0 1 0 0 0 1 1 0 0 0 1 + // 3 0 0 1 1 A 1 0 1 0 9 1 0 0 1 9 1 0 0 1 + // 4 0 1 0 0 4 0 1 0 0 4 0 1 0 0 2 0 0 1 0 + // 5 0 1 0 1 C 1 1 0 0 C 1 1 0 0 A 1 0 1 0 + // 6 0 1 1 0 6 0 1 1 0 5 0 1 0 1 3 0 0 1 1 + // 7 0 1 1 1 E 1 1 1 0 D 1 1 0 1 B 1 0 1 1 + // 8 1 0 0 0 1 0 0 0 1 2 0 0 1 0 4 0 1 0 0 + // 9 1 0 0 1 9 1 0 0 1 A 1 0 1 0 C 1 1 0 0 + // A 1 0 1 0 3 0 0 1 1 3 0 0 1 1 5 0 1 0 1 + // B 1 0 1 1 B 1 0 1 1 B 1 0 1 1 D 1 1 0 1 + // C 1 1 0 0 5 0 1 0 1 6 0 1 1 0 6 0 1 1 0 + // D 1 1 0 1 D 1 1 0 1 E 1 1 1 0 E 1 1 1 0 + // E 1 1 1 0 7 0 1 1 1 7 0 1 1 1 7 0 1 1 1 + // F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 + + auto XorPow2 = [] (ArrayRef<int> Mask, unsigned Num) { + unsigned X = Mask[0] ^ Mask[Num/2]; + // Check that the first half has the X's bits clear. + if ((Mask[0] & X) != 0) + return 0u; + for (unsigned I = 1; I != Num/2; ++I) { + if (unsigned(Mask[I] ^ Mask[I+Num/2]) != X) + return 0u; + if ((Mask[I] & X) != 0) + return 0u; + } + return X; + }; + + // Create a vector of log2's for each column: Perm[i] corresponds to + // the i-th bit (lsb is 0). + assert(VecLen > 2); + for (unsigned I = VecLen; I >= 2; I >>= 1) { + // Examine the initial segment of Mask of size I. + unsigned X = XorPow2(SM.Mask, I); + if (!isPowerOf2_32(X)) + return OpRef::fail(); + // Check the other segments of Mask. + for (int J = 0; J < VecLen; J += I) { + if (XorPow2(subm(SM.Mask, -J), I) != X) + return OpRef::fail(); + } + Perm[Log2_32(X)] = Log2_32(I)-1; + } + + // Once we have Perm, represent it as cycles. Denote the maximum log2 + // (equal to log2(VecLen)-1) as M. The cycle containing M can then be + // written as (M a1 a2 a3 ... an). That cycle can be broken up into + // simple swaps as (M a1)(M a2)(M a3)...(M an), with the composition + // order being from left to right. Any (contiguous) segment where the + // values ai, ai+1...aj are either all increasing or all decreasing, + // can be implemented via a single vshuffvdd/vdealvdd respectively. + // + // If there is a cycle (a1 a2 ... an) that does not involve M, it can + // be written as (M an)(a1 a2 ... an)(M a1). The first two cycles can + // then be folded to get (M a1 a2 ... an)(M a1), and the above procedure + // can be used to generate a sequence of vshuffvdd/vdealvdd. + // + // Example: + // Assume M = 4 and consider a permutation (0 1)(2 3). It can be written + // as (4 0 1)(4 0) composed with (4 2 3)(4 2), or simply + // (4 0 1)(4 0)(4 2 3)(4 2). + // It can then be expanded into swaps as + // (4 0)(4 1)(4 0)(4 2)(4 3)(4 2), + // and broken up into "increasing" segments as + // [(4 0)(4 1)] [(4 0)(4 2)(4 3)] [(4 2)]. + // This is equivalent to + // (4 0 1)(4 0 2 3)(4 2), + // which can be implemented as 3 vshufvdd instructions. + + using CycleType = SmallVector<unsigned,8>; + std::set<CycleType> Cycles; + std::set<unsigned> All; + + for (unsigned I : Perm) + All.insert(I); + + // If the cycle contains LogLen-1, move it to the front of the cycle. + // Otherwise, return the cycle unchanged. + auto canonicalize = [LogLen](const CycleType &C) -> CycleType { + unsigned LogPos, N = C.size(); + for (LogPos = 0; LogPos != N; ++LogPos) + if (C[LogPos] == LogLen-1) + break; + if (LogPos == N) + return C; + + CycleType NewC(C.begin()+LogPos, C.end()); + NewC.append(C.begin(), C.begin()+LogPos); + return NewC; + }; + + while (!All.empty()) { + unsigned A = *All.begin(); + All.erase(A); + CycleType C; + C.push_back(A); + for (unsigned B = Perm[A]; B != A; B = Perm[B]) { + C.push_back(B); + All.erase(B); + } + if (C.size() <= 1) + continue; + Cycles.insert(canonicalize(C)); + } + + SmallVector<unsigned,8> SwapElems; + if (HwLen == unsigned(VecLen)) + SwapElems.push_back(LogLen-1); + + for (const CycleType &C : Cycles) { + unsigned First = (C[0] == LogLen-1) ? 1 : 0; + SwapElems.append(C.begin()+First, C.end()); + if (First == 0) + SwapElems.push_back(C[0]); + } + + const SDLoc &dl(Results.InpNode); + OpRef Arg = Va; + MVT PairTy = getPairVT(MVT::i8); + + for (unsigned I = 0, E = SwapElems.size(); I != E; ) { + bool IsInc = I == E-1 || SwapElems[I] < SwapElems[I+1]; + unsigned S = (1u << SwapElems[I]); + if (I < E-1) { + while (++I < E-1 && IsInc == (SwapElems[I] < SwapElems[I+1])) + S |= 1u << SwapElems[I]; + // The above loop will not add a bit for the final SwapElems[I+1], + // so add it here. + S |= 1u << SwapElems[I]; + } + ++I; + + NodeTemplate Res; + Results.push(Hexagon::A2_tfrsi, MVT::i32, + { DAG.getTargetConstant(S, dl, MVT::i32) }); + Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd; + Res.Ty = PairTy; + Res.Ops = { OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1) }; + Results.push(Res); + Arg = OpRef::res(Results.top()); + } + + return Arg; +} + +OpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + // Butterfly shuffles. + // + // V6_vdelta + // V6_vrdelta + // V6_vror + + // The assumption here is that all elements picked by Mask are in the + // first operand to the vector_shuffle. This assumption is enforced + // by the caller. + + MVT ResTy = getSingleVT(MVT::i8); + PermNetwork::Controls FC, RC; + const SDLoc &dl(Results.InpNode); + int VecLen = SM.Mask.size(); + + for (int M : SM.Mask) { + if (M != -1 && M >= VecLen) + return OpRef::fail(); + } + + // Try the deltas/benes for both single vectors and vector pairs. + ForwardDeltaNetwork FN(SM.Mask); + if (FN.run(FC)) { + SDValue Ctl = getVectorConstant(FC, dl); + Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(Ctl)}); + return OpRef::res(Results.top()); + } + + // Try reverse delta. + ReverseDeltaNetwork RN(SM.Mask); + if (RN.run(RC)) { + SDValue Ctl = getVectorConstant(RC, dl); + Results.push(Hexagon::V6_vrdelta, ResTy, {Va, OpRef(Ctl)}); + return OpRef::res(Results.top()); + } + + // Do Benes. + BenesNetwork BN(SM.Mask); + if (BN.run(FC, RC)) { + SDValue CtlF = getVectorConstant(FC, dl); + SDValue CtlR = getVectorConstant(RC, dl); + Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(CtlF)}); + Results.push(Hexagon::V6_vrdelta, ResTy, + {OpRef::res(-1), OpRef(CtlR)}); + return OpRef::res(Results.top()); + } + + return OpRef::fail(); +} + +SDValue HvxSelector::getVectorConstant(ArrayRef<uint8_t> Data, + const SDLoc &dl) { + SmallVector<SDValue, 128> Elems; + for (uint8_t C : Data) + Elems.push_back(DAG.getConstant(C, dl, MVT::i8)); + MVT VecTy = MVT::getVectorVT(MVT::i8, Data.size()); + SDValue BV = DAG.getBuildVector(VecTy, dl, Elems); + SDValue LV = Lower.LowerOperation(BV, DAG); + DAG.RemoveDeadNode(BV.getNode()); + return LV; +} + +void HvxSelector::selectShuffle(SDNode *N) { + DEBUG_WITH_TYPE("isel", { + dbgs() << "Starting " << __func__ << " on node:\n"; + N->dump(&DAG); + }); + MVT ResTy = N->getValueType(0).getSimpleVT(); + // Assume that vector shuffles operate on vectors of bytes. + assert(ResTy.isVector() && ResTy.getVectorElementType() == MVT::i8); + + auto *SN = cast<ShuffleVectorSDNode>(N); + std::vector<int> Mask(SN->getMask().begin(), SN->getMask().end()); + // This shouldn't really be necessary. Is it? + for (int &Idx : Mask) + if (Idx != -1 && Idx < 0) + Idx = -1; + + unsigned VecLen = Mask.size(); + bool HavePairs = (2*HwLen == VecLen); + assert(ResTy.getSizeInBits() / 8 == VecLen); + + // Vd = vector_shuffle Va, Vb, Mask + // + + bool UseLeft = false, UseRight = false; + for (unsigned I = 0; I != VecLen; ++I) { + if (Mask[I] == -1) + continue; + unsigned Idx = Mask[I]; + assert(Idx < 2*VecLen); + if (Idx < VecLen) + UseLeft = true; + else + UseRight = true; + } + + DEBUG_WITH_TYPE("isel", { + dbgs() << "VecLen=" << VecLen << " HwLen=" << HwLen << " UseLeft=" + << UseLeft << " UseRight=" << UseRight << " HavePairs=" + << HavePairs << '\n'; + }); + // If the mask is all -1's, generate "undef". + if (!UseLeft && !UseRight) { + ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode()); + DAG.RemoveDeadNode(N); + return; + } + + SDValue Vec0 = N->getOperand(0); + SDValue Vec1 = N->getOperand(1); + ResultStack Results(SN); + Results.push(TargetOpcode::COPY, ResTy, {Vec0}); + Results.push(TargetOpcode::COPY, ResTy, {Vec1}); + OpRef Va = OpRef::res(Results.top()-1); + OpRef Vb = OpRef::res(Results.top()); + + OpRef Res = !HavePairs ? shuffs2(ShuffleMask(Mask), Va, Vb, Results) + : shuffp2(ShuffleMask(Mask), Va, Vb, Results); + + bool Done = Res.isValid(); + if (Done) + materialize(Results); + else + Done = scalarizeShuffle(Mask, SDLoc(N), ResTy, Vec0, Vec1, N); + + if (!Done) { +#ifndef NDEBUG + dbgs() << "Unhandled shuffle:\n"; + SN->dumpr(&DAG); +#endif + llvm_unreachable("Failed to select vector shuffle"); + } +} + +void HvxSelector::selectRor(SDNode *N) { + // If this is a rotation by less than 8, use V6_valignbi. + MVT Ty = N->getValueType(0).getSimpleVT(); + const SDLoc &dl(N); + SDValue VecV = N->getOperand(0); + SDValue RotV = N->getOperand(1); + SDNode *NewN = nullptr; + + if (auto *CN = dyn_cast<ConstantSDNode>(RotV.getNode())) { + unsigned S = CN->getZExtValue(); + if (S % HST.getVectorLength() == 0) { + NewN = VecV.getNode(); + } else if (isUInt<3>(S)) { + SDValue C = DAG.getTargetConstant(S, dl, MVT::i32); + NewN = DAG.getMachineNode(Hexagon::V6_valignbi, dl, Ty, + {VecV, VecV, C}); + } + } + + if (!NewN) + NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV}); + + ISel.ReplaceNode(N, NewN); + DAG.RemoveDeadNode(N); +} + +void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) { + HvxSelector(*this, *CurDAG).selectShuffle(N); +} + +void HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) { + HvxSelector(*this, *CurDAG).selectRor(N); +} + diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 22bbb3e94df..859f6976c9b 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -129,6 +129,19 @@ namespace { // Implement calling convention for Hexagon. +static const MVT LegalV64[] = { + MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 +}; +static const MVT LegalW64[] = { + MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64 +}; +static const MVT LegalV128[] = { + MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64 +}; +static const MVT LegalW128[] = { + MVT::v256i8, MVT::v128i16, MVT::v64i32, MVT::v32i64 +}; + static bool CC_Hexagon(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, @@ -1978,36 +1991,52 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); + auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) { + setOperationAction(Opc, FromTy, Promote); + AddPromotedToType(Opc, FromTy, ToTy); + }; + if (Subtarget.useHVXOps()) { - if (Subtarget.useHVX64BOps()) { - setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64, Custom); - // We try to generate the vpack{e/o} instructions. If we fail - // we fall back upon ExpandOp. - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64i8, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i16, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom); - } else if (Subtarget.useHVX128BOps()) { - setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64, Custom); - // We try to generate the vpack{e/o} instructions. If we fail - // we fall back upon ExpandOp. - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v128i8, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i16, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v128i8, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64i16, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom); - } else { - llvm_unreachable("Unrecognized HVX mode"); + bool Use64b = Subtarget.useHVX64BOps(); + ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128; + ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128; + MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8; + MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8; + + setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); + setOperationAction(ISD::CONCAT_VECTORS, ByteW, Legal); + setOperationAction(ISD::OR, ByteV, Legal); + + for (MVT T : LegalV) { + setIndexedLoadAction(ISD::POST_INC, T, Legal); + setIndexedStoreAction(ISD::POST_INC, T, Legal); + + setOperationAction(ISD::BUILD_VECTOR, T, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom); + } + + for (MVT T : LegalV) { + if (T == ByteV) + continue; + // Promote all shuffles and concats to operate on vectors of bytes. + setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV); + setPromoteTo(ISD::CONCAT_VECTORS, T, ByteV); + setPromoteTo(ISD::OR, T, ByteV); + } + + for (MVT T : LegalW) { + if (T == ByteW) + continue; + // Promote all shuffles and concats to operate on vectors of bytes. + setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW); + setPromoteTo(ISD::CONCAT_VECTORS, T, ByteW); } } + // Subtarget-specific operation actions. // if (Subtarget.hasV5TOps()) { @@ -2069,20 +2098,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setIndexedStoreAction(ISD::POST_INC, VT, Legal); } - if (Subtarget.useHVX64BOps()) { - for (MVT VT : {MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64, - MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64}) { - setIndexedLoadAction(ISD::POST_INC, VT, Legal); - setIndexedStoreAction(ISD::POST_INC, VT, Legal); - } - } else if (Subtarget.useHVX128BOps()) { - for (MVT VT : {MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64, - MVT::v256i8, MVT::v128i16, MVT::v64i32, MVT::v32i64}) { - setIndexedLoadAction(ISD::POST_INC, VT, Legal); - setIndexedStoreAction(ISD::POST_INC, VT, Legal); - } - } - computeRegisterProperties(&HRI); // @@ -2225,6 +2240,9 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::VASR: return "HexagonISD::VASR"; case HexagonISD::VLSR: return "HexagonISD::VLSR"; case HexagonISD::VSPLAT: return "HexagonISD::VSPLAT"; + case HexagonISD::VEXTRACTW: return "HexagonISD::VEXTRACTW"; + case HexagonISD::VINSERTW0: return "HexagonISD::VINSERTW0"; + case HexagonISD::VROR: return "HexagonISD::VROR"; case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE"; case HexagonISD::OP_END: break; } @@ -2252,43 +2270,11 @@ bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { // Should we expand the build vector with shuffles? bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const { - // Hexagon vector shuffle operates on element sizes of bytes or halfwords - EVT EltVT = VT.getVectorElementType(); - int EltBits = EltVT.getSizeInBits(); - if ((EltBits != 8) && (EltBits != 16)) - return false; - - return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); -} - -static StridedLoadKind isStridedLoad(const ArrayRef<int> &Mask) { - int even_start = -2; - int odd_start = -1; - size_t mask_len = Mask.size(); - for (auto idx : Mask) { - if ((idx - even_start) == 2) - even_start = idx; - else - break; - } - if (even_start == (int)(mask_len * 2) - 2) - return StridedLoadKind::Even; - for (auto idx : Mask) { - if ((idx - odd_start) == 2) - odd_start = idx; - else - break; - } - if (odd_start == (int)(mask_len * 2) - 1) - return StridedLoadKind::Odd; - - return StridedLoadKind::NoPattern; + return false; } bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const { - if (Subtarget.useHVXOps()) - return isStridedLoad(Mask) != StridedLoadKind::NoPattern; return true; } @@ -2302,7 +2288,6 @@ HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) SDValue V2 = Op.getOperand(1); SDLoc dl(Op); EVT VT = Op.getValueType(); - bool UseHVX = Subtarget.useHVXOps(); if (V2.isUndef()) V2 = V1; @@ -2334,27 +2319,6 @@ HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) DAG.getConstant(Lane, dl, MVT::i32)); } - if (UseHVX) { - ArrayRef<int> Mask = SVN->getMask(); - size_t MaskLen = Mask.size(); - unsigned SizeInBits = VT.getScalarSizeInBits() * MaskLen; - - if ((Subtarget.useHVX64BOps() && SizeInBits == 64 * 8) || - (Subtarget.useHVX128BOps() && SizeInBits == 128 * 8)) { - StridedLoadKind Pattern = isStridedLoad(Mask); - if (Pattern == StridedLoadKind::NoPattern) - return SDValue(); - - unsigned Opc = Pattern == StridedLoadKind::Even ? HexagonISD::VPACKE - : HexagonISD::VPACKO; - return DAG.getNode(Opc, dl, VT, {Op.getOperand(1), Op.getOperand(0)}); - } - // We used to assert in the "else" part here, but that is bad for Halide - // Halide creates intermediate double registers by interleaving two - // concatenated vector registers. The interleaving requires vector_shuffle - // nodes and we shouldn't barf on a double register result of a - // vector_shuffle because it is most likely an intermediate result. - } // FIXME: We need to support more general vector shuffles. See // below the comment from the ARM backend that deals in the general // case with the vector shuffles. For now, let expand handle these. @@ -2445,7 +2409,7 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl, SmallVector<ConstantSDNode*,4> Consts; bool AllConst = true; for (SDValue V : Elem) { - if (V.getOpcode() == ISD::UNDEF) + if (isUndef(V)) V = DAG.getConstant(0, dl, ElemTy); auto *C = dyn_cast<ConstantSDNode>(V.getNode()); Consts.push_back(C); @@ -2454,7 +2418,7 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl, unsigned First, Num = Elem.size(); for (First = 0; First != Num; ++First) - if (Elem[First].getOpcode() != ISD::UNDEF) + if (!isUndef(Elem[First])) break; if (First == Num) return DAG.getUNDEF(VecTy); @@ -2466,9 +2430,9 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl, Consts[1]->getZExtValue() << 16; return DAG.getBitcast(MVT::v2i16, DAG.getConstant(V, dl, MVT::i32)); } - SDNode *N = DAG.getMachineNode(Hexagon::A2_combine_ll, dl, MVT::i32, - { Elem[1], Elem[0] }); - return DAG.getBitcast(MVT::v2i16, SDValue(N,0)); + SDValue N = getNode(Hexagon::A2_combine_ll, dl, MVT::i32, + {Elem[1], Elem[0]}, DAG); + return DAG.getBitcast(MVT::v2i16, N); } // First try generating a constant. @@ -2486,7 +2450,7 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl, for (unsigned i = 0; i != Num; ++i) { if (i == First) continue; - if (Elem[i] == Elem[First] || Elem[i].getOpcode() == ISD::UNDEF) + if (Elem[i] == Elem[First] || isUndef(Elem[i])) continue; IsSplat = false; break; @@ -2507,9 +2471,9 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl, SDValue V5 = DAG.getNode(ISD::SHL, dl, MVT::i32, {V3, S8}); SDValue V6 = DAG.getNode(ISD::OR, dl, MVT::i32, {V0, V4}); SDValue V7 = DAG.getNode(ISD::OR, dl, MVT::i32, {V2, V5}); - SDNode *T0 = DAG.getMachineNode(Hexagon::A2_combine_ll, dl, MVT::i32, - {V7, V6}); - return DAG.getBitcast(MVT::v4i8, SDValue(T0,0)); + + SDValue T0 = getNode(Hexagon::A2_combine_ll, dl, MVT::i32, {V7, V6}, DAG); + return DAG.getBitcast(MVT::v4i8, T0); } SDValue @@ -2521,7 +2485,7 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl, SmallVector<ConstantSDNode*,8> Consts; bool AllConst = true; for (SDValue V : Elem) { - if (V.getOpcode() == ISD::UNDEF) + if (isUndef(V)) V = DAG.getConstant(0, dl, ElemTy); auto *C = dyn_cast<ConstantSDNode>(V.getNode()); Consts.push_back(C); @@ -2530,7 +2494,7 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl, unsigned First, Num = Elem.size(); for (First = 0; First != Num; ++First) - if (Elem[First].getOpcode() != ISD::UNDEF) + if (!isUndef(Elem[First])) break; if (First == Num) return DAG.getUNDEF(VecTy); @@ -2541,7 +2505,7 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl, for (unsigned i = 0; i != Num; ++i) { if (i == First) continue; - if (Elem[i] == Elem[First] || Elem[i].getOpcode() == ISD::UNDEF) + if (Elem[i] == Elem[First] || isUndef(Elem[i])) continue; IsSplat = false; break; @@ -2570,12 +2534,7 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl, SDValue H = (ElemTy == MVT::i32) ? Elem[1] : buildVector32({Elem.data()+Num/2, Num/2}, dl, HalfTy, DAG); - unsigned Id = Hexagon::DoubleRegsRegClassID; - SDNode *N = DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VecTy, - { DAG.getTargetConstant(Id, dl, MVT::i32), - L, DAG.getTargetConstant(Hexagon::isub_lo, dl, MVT::i32), - H, DAG.getTargetConstant(Hexagon::isub_hi, dl, MVT::i32) }); - return SDValue(N, 0); + return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, {H, L}); } SDValue @@ -2675,120 +2634,33 @@ HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV, SDValue HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { - MVT VT = Op.getValueType().getSimpleVT(); - unsigned BW = VT.getSizeInBits(); + MVT VecTy = ty(Op); + unsigned BW = VecTy.getSizeInBits(); if (BW == 32 || BW == 64) { SmallVector<SDValue,8> Ops; for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) Ops.push_back(Op.getOperand(i)); if (BW == 32) - return buildVector32(Ops, SDLoc(Op), VT, DAG); - return buildVector64(Ops, SDLoc(Op), VT, DAG); + return buildVector32(Ops, SDLoc(Op), VecTy, DAG); + return buildVector64(Ops, SDLoc(Op), VecTy, DAG); } + if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy)) + return LowerHvxBuildVector(Op, DAG); + return SDValue(); } SDValue HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - bool UseHVX = Subtarget.useHVXOps(); - EVT VT = Op.getValueType(); - unsigned NElts = Op.getNumOperands(); - SDValue Vec0 = Op.getOperand(0); - EVT VecVT = Vec0.getValueType(); - unsigned Width = VecVT.getSizeInBits(); - - if (NElts == 2) { - MVT ST = VecVT.getSimpleVT(); - // We are trying to concat two v2i16 to a single v4i16, or two v4i8 - // into a single v8i8. - if (ST == MVT::v2i16 || ST == MVT::v4i8) - return DAG.getNode(HexagonISD::COMBINE, dl, VT, Op.getOperand(1), Vec0); - - if (UseHVX) { - assert((Width == 64 * 8 && Subtarget.useHVX64BOps()) || - (Width == 128 * 8 && Subtarget.useHVX128BOps())); - SDValue Vec1 = Op.getOperand(1); - MVT OpTy = Subtarget.useHVX64BOps() ? MVT::v16i32 : MVT::v32i32; - MVT ReTy = Subtarget.useHVX64BOps() ? MVT::v32i32 : MVT::v64i32; - SDValue B0 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec0); - SDValue B1 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec1); - SDValue VC = DAG.getNode(HexagonISD::VCOMBINE, dl, ReTy, B1, B0); - return DAG.getNode(ISD::BITCAST, dl, VT, VC); - } - } - - if (VT.getSizeInBits() != 32 && VT.getSizeInBits() != 64) - return SDValue(); - - SDValue C0 = DAG.getConstant(0, dl, MVT::i64); - SDValue C32 = DAG.getConstant(32, dl, MVT::i64); - SDValue W = DAG.getConstant(Width, dl, MVT::i64); - // Create the "width" part of the argument to insert_rp/insertp_rp. - SDValue S = DAG.getNode(ISD::SHL, dl, MVT::i64, W, C32); - SDValue V = C0; - - for (unsigned i = 0, e = NElts; i != e; ++i) { - unsigned N = NElts-i-1; - SDValue OpN = Op.getOperand(N); - - if (VT.getSizeInBits() == 64 && OpN.getValueSizeInBits() == 32) { - SDValue C = DAG.getConstant(0, dl, MVT::i32); - OpN = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, OpN); - } - SDValue Idx = DAG.getConstant(N, dl, MVT::i64); - SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, W); - SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, S, Offset); - if (VT.getSizeInBits() == 32) - V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, {V, OpN, Or}); - else if (VT.getSizeInBits() == 64) - V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, {V, OpN, Or}); - else - return SDValue(); - } - - return DAG.getNode(ISD::BITCAST, dl, VT, V); -} - -SDValue -HexagonTargetLowering::LowerEXTRACT_SUBVECTOR_HVX(SDValue Op, - SelectionDAG &DAG) const { - EVT VT = Op.getOperand(0).getValueType(); - SDLoc dl(Op); - bool UseHVX = Subtarget.useHVXOps(); - bool UseHVX64B = Subtarget.useHVX64BOps(); - // Just in case... - - if (!VT.isVector() || !UseHVX) - return SDValue(); - - EVT ResVT = Op.getValueType(); - unsigned ResSize = ResVT.getSizeInBits(); - unsigned VectorSizeInBits = UseHVX64B ? (64 * 8) : (128 * 8); - unsigned OpSize = VT.getSizeInBits(); - - // We deal only with cases where the result is the vector size - // and the vector operand is a double register. - if (!(ResVT.isByteSized() && ResSize == VectorSizeInBits) || - !(VT.isByteSized() && OpSize == 2 * VectorSizeInBits)) - return SDValue(); - - ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1)); - if (!Cst) - return SDValue(); - unsigned Val = Cst->getZExtValue(); - - // These two will get lowered to an appropriate EXTRACT_SUBREG in ISel. - if (Val == 0) { - SDValue Vec = Op.getOperand(0); - return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, ResVT, Vec); - } + MVT VecTy = ty(Op); + assert(!Subtarget.useHVXOps() || !Subtarget.isHVXVectorType(VecTy)); - if (ResVT.getVectorNumElements() == Val) { - SDValue Vec = Op.getOperand(0); - return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, ResVT, Vec); + if (VecTy.getSizeInBits() == 64) { + assert(Op.getNumOperands() == 2); + return DAG.getNode(HexagonISD::COMBINE, SDLoc(Op), VecTy, Op.getOperand(1), + Op.getOperand(0)); } return SDValue(); @@ -2798,6 +2670,10 @@ SDValue HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDValue Vec = Op.getOperand(0); + MVT VecTy = ty(Vec); + if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy)) + return LowerHvxExtractElement(Op, DAG); + MVT ElemTy = ty(Vec).getVectorElementType(); return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ElemTy, ty(Op), DAG); } @@ -2808,7 +2684,7 @@ HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SDValue Vec = Op.getOperand(0); MVT VecTy = ty(Vec); if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy)) - return LowerEXTRACT_SUBVECTOR_HVX(Op, DAG); + return LowerHvxExtractSubvector(Op, DAG); return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ty(Op), ty(Op), DAG); } @@ -2817,6 +2693,9 @@ SDValue HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { MVT VecTy = ty(Op); + if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy)) + return LowerHvxInsertElement(Op, DAG); + return insertVector(Op.getOperand(0), Op.getOperand(1), Op.getOperand(2), SDLoc(Op), VecTy.getVectorElementType(), DAG); } @@ -2824,6 +2703,9 @@ HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SDValue HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { + if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(ty(Op))) + return LowerHvxInsertSubvector(Op, DAG); + SDValue ValV = Op.getOperand(1); return insertVector(Op.getOperand(0), ValV, Op.getOperand(2), SDLoc(Op), ty(ValV), DAG); @@ -2911,6 +2793,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::PREFETCH: return LowerPREFETCH(Op, DAG); case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG); } + return SDValue(); } /// Returns relocation base for the given PIC jumptable. diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 9f7891e1746..17310914572 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -63,6 +63,9 @@ namespace HexagonISD { VCOMBINE, VPACKE, VPACKO, + VEXTRACTW, + VINSERTW0, + VROR, TC_RETURN, EH_RETURN, DCFETCH, @@ -88,6 +91,8 @@ namespace HexagonISD { explicit HexagonTargetLowering(const TargetMachine &TM, const HexagonSubtarget &ST); + bool isHVXVectorType(MVT Ty) const; + /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. @@ -121,9 +126,8 @@ namespace HexagonISD { SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEXTRACT_SUBVECTOR_HVX(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const; @@ -281,7 +285,24 @@ namespace HexagonISD { return Ty; return MVT::getIntegerVT(Ty.getSizeInBits()); } + MVT tyVector(MVT Ty, MVT ElemTy) const { + if (Ty.isVector() && Ty.getVectorElementType() == ElemTy) + return Ty; + unsigned TyWidth = Ty.getSizeInBits(), ElemWidth = ElemTy.getSizeInBits(); + assert((TyWidth % ElemWidth) == 0); + return MVT::getVectorVT(ElemTy, TyWidth/ElemWidth); + } + bool isUndef(SDValue Op) const { + if (Op.isMachineOpcode()) + return Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; + return Op.getOpcode() == ISD::UNDEF; + } + SDValue getNode(unsigned MachineOpc, const SDLoc &dl, MVT Ty, + ArrayRef<SDValue> Ops, SelectionDAG &DAG) const { + SDNode *N = DAG.getMachineNode(MachineOpc, dl, Ty, Ops); + return SDValue(N, 0); + } SDValue buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl, MVT VecTy, SelectionDAG &DAG) const; SDValue buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl, MVT VecTy, @@ -291,6 +312,38 @@ namespace HexagonISD { SDValue insertVector(SDValue VecV, SDValue ValV, SDValue IdxV, const SDLoc &dl, MVT ValTy, SelectionDAG &DAG) const; + using VectorPair = std::pair<SDValue, SDValue>; + using TypePair = std::pair<MVT, MVT>; + + SDValue getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops, + const SDLoc &dl, SelectionDAG &DAG) const; + + TypePair ty(const VectorPair &Ops) const { + return { Ops.first.getValueType().getSimpleVT(), + Ops.second.getValueType().getSimpleVT() }; + } + + MVT typeJoin(const TypePair &Tys) const; + TypePair typeSplit(MVT Ty) const; + MVT typeCastElem(MVT VecTy, MVT ElemTy) const; + MVT typeExtElem(MVT VecTy, unsigned Factor) const; + MVT typeTruncElem(MVT VecTy, unsigned Factor) const; + + SDValue opJoin(const VectorPair &Ops, const SDLoc &dl, + SelectionDAG &DAG) const; + VectorPair opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const; + SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const; + + SDValue convertToByteIndex(SDValue ElemIdx, MVT ElemTy, + SelectionDAG &DAG) const; + SDValue getIndexInWord32(SDValue Idx, MVT ElemTy, SelectionDAG &DAG) const; + + SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) const; + std::pair<const TargetRegisterClass*, uint8_t> findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp new file mode 100644 index 00000000000..3a9e50826a0 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -0,0 +1,299 @@ +//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HexagonISelLowering.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" + +using namespace llvm; + +SDValue +HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops, + const SDLoc &dl, SelectionDAG &DAG) const { + SmallVector<SDValue,4> IntOps; + IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32)); + for (const SDValue &Op : Ops) + IntOps.push_back(Op); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps); +} + +MVT +HexagonTargetLowering::typeJoin(const TypePair &Tys) const { + assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType()); + + MVT ElemTy = Tys.first.getVectorElementType(); + return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() + + Tys.second.getVectorNumElements()); +} + +HexagonTargetLowering::TypePair +HexagonTargetLowering::typeSplit(MVT VecTy) const { + assert(VecTy.isVector()); + unsigned NumElem = VecTy.getVectorNumElements(); + assert((NumElem % 2) == 0 && "Expecting even-sized vector type"); + MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2); + return { HalfTy, HalfTy }; +} + +MVT +HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const { + MVT ElemTy = VecTy.getVectorElementType(); + MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor); + return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements()); +} + +MVT +HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const { + MVT ElemTy = VecTy.getVectorElementType(); + MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor); + return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements()); +} + +SDValue +HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy, + SelectionDAG &DAG) const { + if (ty(Vec).getVectorElementType() == ElemTy) + return Vec; + MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy); + return DAG.getBitcast(CastTy, Vec); +} + +SDValue +HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl, + SelectionDAG &DAG) const { + return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)), + Ops.second, Ops.first); +} + +HexagonTargetLowering::VectorPair +HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl, + SelectionDAG &DAG) const { + TypePair Tys = typeSplit(ty(Vec)); + return DAG.SplitVector(Vec, dl, Tys.first, Tys.second); +} + +SDValue +HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy, + SelectionDAG &DAG) const { + if (ElemIdx.getValueType().getSimpleVT() != MVT::i32) + ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx); + + unsigned ElemWidth = ElemTy.getSizeInBits(); + if (ElemWidth == 8) + return ElemIdx; + + unsigned L = Log2_32(ElemWidth/8); + const SDLoc &dl(ElemIdx); + return DAG.getNode(ISD::SHL, dl, MVT::i32, + {ElemIdx, DAG.getConstant(L, dl, MVT::i32)}); +} + +SDValue +HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy, + SelectionDAG &DAG) const { + unsigned ElemWidth = ElemTy.getSizeInBits(); + assert(ElemWidth >= 8 && ElemWidth <= 32); + if (ElemWidth == 32) + return Idx; + + if (ty(Idx) != MVT::i32) + Idx = DAG.getBitcast(MVT::i32, Idx); + const SDLoc &dl(Idx); + SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32); + SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask}); + return SubIdx; +} + +SDValue +HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) + const { + const SDLoc &dl(Op); + BuildVectorSDNode *BN = cast<BuildVectorSDNode>(Op.getNode()); + bool IsConst = BN->isConstant(); + MachineFunction &MF = DAG.getMachineFunction(); + MVT VecTy = ty(Op); + + if (IsConst) { + SmallVector<Constant*, 128> Elems; + for (SDValue V : BN->op_values()) { + if (auto *C = dyn_cast<ConstantSDNode>(V.getNode())) + Elems.push_back(const_cast<ConstantInt*>(C->getConstantIntValue())); + } + Constant *CV = ConstantVector::get(Elems); + unsigned Align = VecTy.getSizeInBits() / 8; + SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, VecTy, Align), DAG); + return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP, + MachinePointerInfo::getConstantPool(MF), Align); + } + + unsigned NumOps = Op.getNumOperands(); + unsigned HwLen = Subtarget.getVectorLength(); + unsigned ElemSize = VecTy.getVectorElementType().getSizeInBits() / 8; + assert(ElemSize*NumOps == HwLen); + + SmallVector<SDValue,32> Words; + SmallVector<SDValue,32> Ops; + for (unsigned i = 0; i != NumOps; ++i) + Ops.push_back(Op.getOperand(i)); + + if (VecTy.getVectorElementType() != MVT::i32) { + assert(ElemSize < 4 && "vNi64 should have been promoted to vNi32"); + assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size"); + unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2; + MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord); + for (unsigned i = 0; i != NumOps; i += OpsPerWord) { + SDValue W = buildVector32({&Ops[i], OpsPerWord}, dl, PartVT, DAG); + Words.push_back(DAG.getBitcast(MVT::i32, W)); + } + } else { + Words.assign(Ops.begin(), Ops.end()); + } + + // Construct two halves in parallel, then or them together. + assert(4*Words.size() == Subtarget.getVectorLength()); + SDValue HalfV0 = getNode(Hexagon::V6_vd0, dl, VecTy, {}, DAG); + SDValue HalfV1 = getNode(Hexagon::V6_vd0, dl, VecTy, {}, DAG); + SDValue S = DAG.getConstant(4, dl, MVT::i32); + unsigned NumWords = Words.size(); + for (unsigned i = 0; i != NumWords/2; ++i) { + SDValue N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, + {HalfV0, Words[i]}); + SDValue M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, + {HalfV1, Words[i+NumWords/2]}); + HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, S}); + HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, S}); + } + + HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, + {HalfV0, DAG.getConstant(HwLen/2, dl, MVT::i32)}); + SDValue DstV = DAG.getNode(ISD::OR, dl, VecTy, {HalfV0, HalfV1}); + return DstV; +} + +SDValue +HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) + const { + // Change the type of the extracted element to i32. + SDValue VecV = Op.getOperand(0); + MVT ElemTy = ty(VecV).getVectorElementType(); + unsigned ElemWidth = ElemTy.getSizeInBits(); + assert(ElemWidth >= 8 && ElemWidth <= 32); // TODO i64 + + const SDLoc &dl(Op); + SDValue IdxV = Op.getOperand(1); + if (ty(IdxV) != MVT::i32) + IdxV = DAG.getBitcast(MVT::i32, IdxV); + + SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG); + SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, + {VecV, ByteIdx}); + if (ElemTy == MVT::i32) + return ExWord; + + // Have an extracted word, need to extract the smaller element out of it. + // 1. Extract the bits of (the original) IdxV that correspond to the index + // of the desired element in the 32-bit word. + SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG); + // 2. Extract the element from the word. + SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord); + return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG); +} + +SDValue +HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) + const { + const SDLoc &dl(Op); + SDValue VecV = Op.getOperand(0); + SDValue ValV = Op.getOperand(1); + SDValue IdxV = Op.getOperand(2); + MVT ElemTy = ty(VecV).getVectorElementType(); + unsigned ElemWidth = ElemTy.getSizeInBits(); + assert(ElemWidth >= 8 && ElemWidth <= 32); // TODO i64 + + auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV, + SDValue ByteIdxV) { + MVT VecTy = ty(VecV); + unsigned HwLen = Subtarget.getVectorLength(); + SDValue MaskV = DAG.getNode(ISD::AND, dl, MVT::i32, + {ByteIdxV, DAG.getConstant(-4, dl, MVT::i32)}); + SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV}); + SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV}); + SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32, + {DAG.getConstant(HwLen/4, dl, MVT::i32), MaskV}); + SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV}); + return TorV; + }; + + SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG); + if (ElemTy == MVT::i32) + return InsertWord(VecV, ValV, ByteIdx); + + // If this is not inserting a 32-bit word, convert it into such a thing. + // 1. Extract the existing word from the target vector. + SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32, + {ByteIdx, DAG.getConstant(2, dl, MVT::i32)}); + SDValue Ex0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, + {opCastElem(VecV, MVT::i32, DAG), WordIdx}); + SDValue Ext = LowerHvxExtractElement(Ex0, DAG); + + // 2. Treating the extracted word as a 32-bit vector, insert the given + // value into it. + SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG); + MVT SubVecTy = tyVector(ty(Ext), ElemTy); + SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext), + ValV, SubIdx, dl, SubVecTy, DAG); + + // 3. Insert the 32-bit word back into the original vector. + return InsertWord(VecV, Ins, ByteIdx); +} + +SDValue +HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) + const { + SDValue SrcV = Op.getOperand(0); + MVT SrcTy = ty(SrcV); + unsigned SrcElems = SrcTy.getVectorNumElements(); + SDValue IdxV = Op.getOperand(1); + unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); + MVT DstTy = ty(Op); + assert(Idx == 0 || DstTy.getVectorNumElements() % Idx == 0); + const SDLoc &dl(Op); + if (Idx == 0) + return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, DstTy, SrcV); + if (Idx == SrcElems/2) + return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, DstTy, SrcV); + return SDValue(); +} + +SDValue +HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) + const { + // Idx may be variable + SDValue IdxV = Op.getOperand(2); + auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode()); + if (!IdxN) + return SDValue(); + unsigned Idx = IdxN->getZExtValue(); + + SDValue DstV = Op.getOperand(0); + SDValue SrcV = Op.getOperand(1); + MVT DstTy = ty(DstV); + MVT SrcTy = ty(SrcV); + unsigned DstElems = DstTy.getVectorNumElements(); + unsigned SrcElems = SrcTy.getVectorNumElements(); + if (2*SrcElems != DstElems) + return SDValue(); + + const SDLoc &dl(Op); + if (Idx == 0) + return DAG.getTargetInsertSubreg(Hexagon::vsub_lo, dl, DstTy, DstV, SrcV); + if (Idx == SrcElems) + return DAG.getTargetInsertSubreg(Hexagon::vsub_hi, dl, DstTy, DstV, SrcV); + return SDValue(); +} diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 270575aa09a..f1d01b0cee2 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -19,10 +19,10 @@ // (8) Shift/permute // (9) Arithmetic/bitwise // (10) Bit -// (11) Load -// (12) Store -// (13) Memop -// (14) PIC +// (11) PIC +// (12) Load +// (13) Store +// (14) Memop // (15) Call // (16) Branch // (17) Misc @@ -340,6 +340,8 @@ def: Pat<(HexagonCONST32_GP tblockaddress:$A), (A2_tfrsi imm:$A)>; def: Pat<(HexagonCONST32_GP tglobaladdr:$A), (A2_tfrsi imm:$A)>; def: Pat<(HexagonJT tjumptable:$A), (A2_tfrsi imm:$A)>; def: Pat<(HexagonCP tconstpool:$A), (A2_tfrsi imm:$A)>; +// The HVX load patterns also match CP directly. Make sure that if +// the selection of this opcode changes, it's updated in all places. def: Pat<(i1 0), (PS_false)>; def: Pat<(i1 1), (PS_true)>; @@ -1630,7 +1632,31 @@ let AddedComplexity = 100 in { (I1toI32 (S4_ntstbit_r IntRegs:$Rs, IntRegs:$Rt))>; } -// --(11) Load ----------------------------------------------------------- +// --(11) PIC ------------------------------------------------------------ +// + +def SDT_HexagonAtGot + : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; +def SDT_HexagonAtPcrel + : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +// AT_GOT address-of-GOT, address-of-global, offset-in-global +def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>; +// AT_PCREL address-of-global +def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>; + +def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)), + (L2_loadri_io I32:$got, imm:$addr)>; +def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off), + (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>; +def: Pat<(HexagonAtPcrel I32:$addr), + (C4_addipc imm:$addr)>; + +// The HVX load patterns also match AT_PCREL directly. Make sure that +// if the selection of this opcode changes, it's updated in all places. + + +// --(12) Load ----------------------------------------------------------- // def extloadv2i8: PatFrag<(ops node:$ptr), (extload node:$ptr), [{ @@ -1971,6 +1997,12 @@ multiclass HvxLd_pat<InstHexagon MI, PatFrag Load, ValueType VT, PatFrag ImmPred> { def: Pat<(VT (Load I32:$Rt)), (MI I32:$Rt, 0)>; def: Pat<(VT (Load (add I32:$Rt, ImmPred:$s))), (MI I32:$Rt, imm:$s)>; + // The HVX selection code for shuffles can generate vector constants. + // Calling "Select" on the resulting loads from CP fails without these + // patterns. + def: Pat<(VT (Load (HexagonCP tconstpool:$A))), (MI (A2_tfrsi imm:$A), 0)>; + def: Pat<(VT (Load (HexagonAtPcrel tconstpool:$A))), + (MI (C4_addipc imm:$A), 0)>; } @@ -1997,7 +2029,7 @@ let Predicates = [UseHVX] in { } -// --(12) Store ---------------------------------------------------------- +// --(13) Store ---------------------------------------------------------- // @@ -2466,7 +2498,7 @@ let Predicates = [UseHVX] in { } -// --(13) Memop ---------------------------------------------------------- +// --(14) Memop ---------------------------------------------------------- // def m5_0Imm8Pred : PatLeaf<(i32 imm), [{ @@ -2744,27 +2776,6 @@ let AddedComplexity = 220 in { } -// --(14) PIC ------------------------------------------------------------ -// - -def SDT_HexagonAtGot - : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; -def SDT_HexagonAtPcrel - : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; - -// AT_GOT address-of-GOT, address-of-global, offset-in-global -def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>; -// AT_PCREL address-of-global -def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>; - -def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)), - (L2_loadri_io I32:$got, imm:$addr)>; -def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off), - (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>; -def: Pat<(HexagonAtPcrel I32:$addr), - (C4_addipc imm:$addr)>; - - // --(15) Call ----------------------------------------------------------- // @@ -2894,3 +2905,32 @@ def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf, [SDNPHasChain]>; def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>; + +def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>; +def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>; + +def SDTHexagonVINSERTW0: SDTypeProfile<1, 2, + [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; +def HexagonVINSERTW0 : SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>; + +let Predicates = [UseHVX] in { + def: Pat<(concat_vectors HVI8:$Vs, HVI8:$Vt), + (V6_vcombine HvxVR:$Vt, HvxVR:$Vs)>; + def: Pat<(or HVI8:$Vs, HVI8:$Vt), + (V6_vor HvxVR:$Vt, HvxVR:$Vs)>; + + def: Pat<(HexagonVEXTRACTW HVI8:$Vu, I32:$Rs), + (V6_extractw HvxVR:$Vu, I32:$Rs)>; + def: Pat<(HexagonVEXTRACTW HVI16:$Vu, I32:$Rs), + (V6_extractw HvxVR:$Vu, I32:$Rs)>; + def: Pat<(HexagonVEXTRACTW HVI32:$Vu, I32:$Rs), + (V6_extractw HvxVR:$Vu, I32:$Rs)>; + + def: Pat<(HexagonVINSERTW0 HVI8:$Vu, I32:$Rt), + (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; + def: Pat<(HexagonVINSERTW0 HVI16:$Vu, I32:$Rt), + (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; + def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt), + (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; +} |

