diff options
| author | Krzysztof Parzyszek <kparzysz@codeaurora.org> | 2016-07-29 16:44:27 +0000 |
|---|---|---|
| committer | Krzysztof Parzyszek <kparzysz@codeaurora.org> | 2016-07-29 16:44:27 +0000 |
| commit | 0bd55a7608098ebacd4f8bd4a83a15bc1b3e7206 (patch) | |
| tree | 7556e05822bd5220fdbac8db5f28fa25d145f5bf /llvm/lib | |
| parent | 6731dead227b7a8783a67c40ba05a04288cc364b (diff) | |
| download | bcm5719-llvm-0bd55a7608098ebacd4f8bd4a83a15bc1b3e7206.tar.gz bcm5719-llvm-0bd55a7608098ebacd4f8bd4a83a15bc1b3e7206.zip | |
[Hexagon] Custom lower VECTOR_SHUFFLE and EXTRACT_SUBVECTOR for HVX
If the mask of a vector shuffle has alternating odd or even numbers
starting with 1 or 0 respectively up to the largest possible index
for the given type in the given HVX mode (single of double) we can
generate vpacko or vpacke instruction respectively.
E.g.
%42 = shufflevector <32 x i16> %37, <32 x i16> %41,
<32 x i32> <i32 1, i32 3, ..., i32 63>
is %42.h = vpacko(%41.w, %37.w)
Patch by Pranav Bhandarkar.
llvm-svn: 277168
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLowering.cpp | 177 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLowering.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td | 40 |
3 files changed, 199 insertions, 22 deletions
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 36ce6085662..cb106c83ec1 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -83,18 +83,24 @@ static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os", namespace { -class HexagonCCState : public CCState { - unsigned NumNamedVarArgParams; + class HexagonCCState : public CCState { + unsigned NumNamedVarArgParams; -public: - HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, - SmallVectorImpl<CCValAssign> &locs, LLVMContext &C, - int NumNamedVarArgParams) - : CCState(CC, isVarArg, MF, locs, C), - NumNamedVarArgParams(NumNamedVarArgParams) {} + public: + HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, + SmallVectorImpl<CCValAssign> &locs, LLVMContext &C, + int NumNamedVarArgParams) + : CCState(CC, isVarArg, MF, locs, C), + NumNamedVarArgParams(NumNamedVarArgParams) {} - unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; } -}; + unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; } + }; + + enum StridedLoadKind { + Even = 0, + Odd, + NoPattern + }; } // Implement calling convention for Hexagon. @@ -1992,11 +1998,26 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64, Custom); + // We try to generate the vpack{e/o} instructions. If we fail + // we fall back upon ExpandOp. + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64i8, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i16, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom); } else if (UseHVXDbl) { setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i16, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64, Custom); + // We try to generate the vpack{e/o} instructions. If we fail + // we fall back upon ExpandOp. + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v128i8, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i16, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v128i8, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64i16, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom); } else { llvm_unreachable("Unrecognized HVX mode"); } @@ -2228,6 +2249,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT"; case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU"; case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE"; + case HexagonISD::VPACK: return "HexagonISD::VPACK"; case HexagonISD::VSHLH: return "HexagonISD::VSHLH"; case HexagonISD::VSHLW: return "HexagonISD::VSHLW"; case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB"; @@ -2257,7 +2279,6 @@ bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { return (VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32); } -// shouldExpandBuildVectorWithShuffles // Should we expand the build vector with shuffles? bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT, @@ -2272,14 +2293,41 @@ HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT, return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); } -// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3). V1 and -// V2 are the two vectors to select data from, V3 is the permutation. -static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { +static StridedLoadKind isStridedLoad(ArrayRef<int> &Mask) { + int even_start = -2; + int odd_start = -1; + size_t mask_len = Mask.size(); + for (auto idx : Mask) { + if ((idx - even_start) == 2) + even_start = idx; + else + break; + } + if (even_start == (int)(mask_len * 2) - 2) + return StridedLoadKind::Even; + for (auto idx : Mask) { + if ((idx - odd_start) == 2) + odd_start = idx; + else + break; + } + if (odd_start == (int)(mask_len * 2) - 1) + return StridedLoadKind::Odd; + + return StridedLoadKind::NoPattern; +} + +// Lower a vector shuffle (V1, V2, V3). V1 and V2 are the two vectors +// to select data from, V3 is the permutation. +SDValue +HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) + const { const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); SDLoc dl(Op); EVT VT = Op.getValueType(); + bool UseHVX = Subtarget.useHVXOps(); if (V2.isUndef()) V2 = V1; @@ -2309,6 +2357,30 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return createSplat(DAG, dl, VT, DAG.getConstant(Lane, dl, MVT::i32)); } + if (UseHVX) { + ArrayRef<int> Mask = SVN->getMask(); + size_t MaskLen = Mask.size(); + int ElemSizeInBits = VT.getVectorElementType().getSizeInBits(); + if ((Subtarget.useHVXSglOps() && (ElemSizeInBits * MaskLen) == 64 * 8) || + (Subtarget.useHVXDblOps() && (ElemSizeInBits * MaskLen) == 128 * 8)) { + // Return 1 for odd and 2 of even + StridedLoadKind Pattern = isStridedLoad(Mask); + + if (Pattern == StridedLoadKind::NoPattern) + return SDValue(); + + SDValue Vec0 = Op.getOperand(0); + SDValue Vec1 = Op.getOperand(1); + SDValue StridePattern = DAG.getConstant(Pattern, dl, MVT::i32); + SDValue Ops[] = { Vec1, Vec0, StridePattern }; + return DAG.getNode(HexagonISD::VPACK, dl, VT, Ops); + } + // We used to assert in the "else" part here, but that is bad for Halide + // Halide creates intermediate double registers by interleaving two + // concatenated vector registers. The interleaving requires vector_shuffle + // nodes and we shouldn't barf on a double register result of a + // vector_shuffle because it is most likely an intermediate result. + } // FIXME: We need to support more general vector shuffles. See // below the comment from the ARM backend that deals in the general // case with the vector shuffles. For now, let expand handle these. @@ -2331,10 +2403,11 @@ static bool isCommonSplatElement(BuildVectorSDNode *BVN) { return true; } -// LowerVECTOR_SHIFT - Lower a vector shift. Try to convert +// Lower a vector shift. Try to convert // <VT> = SHL/SRA/SRL <VT> by <VT> to Hexagon specific // <VT> = SHL/SRA/SRL <VT> by <IT/i32>. -static SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) { +SDValue +HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const { BuildVectorSDNode *BVN = 0; SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -2484,6 +2557,9 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { Res = (Res << EltSize) | Val; } + if (Size > 64) + return SDValue(); + if (Size == 64) ConstVal = DAG.getConstant(Res, dl, MVT::i64); else @@ -2581,16 +2657,68 @@ HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, S, Offset); if (VT.getSizeInBits() == 32) V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, {V, OpN, Or}); - else + else if (VT.getSizeInBits() == 64) V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, {V, OpN, Or}); + else + return SDValue(); } return DAG.getNode(ISD::BITCAST, dl, VT, V); } SDValue +HexagonTargetLowering::LowerEXTRACT_SUBVECTOR_HVX(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getOperand(0).getValueType(); + SDLoc dl(Op); + bool UseHVX = Subtarget.useHVXOps(); + bool UseHVXSgl = Subtarget.useHVXSglOps(); + // Just in case... + + if (!VT.isVector() || !UseHVX) + return SDValue(); + + EVT ResVT = Op.getValueType(); + unsigned ResSize = ResVT.getSizeInBits(); + unsigned VectorSizeInBits = UseHVXSgl ? (64 * 8) : (128 * 8); + unsigned OpSize = VT.getSizeInBits(); + + // We deal only with cases where the result is the vector size + // and the vector operand is a double register. + if (!(ResVT.isByteSized() && ResSize == VectorSizeInBits) || + !(VT.isByteSized() && OpSize == 2 * VectorSizeInBits)) + return SDValue(); + + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + if (!Cst) + return SDValue(); + unsigned Val = Cst->getZExtValue(); + + // These two will get lowered to an appropriate EXTRACT_SUBREG in ISel. + if (Val == 0) { + SDValue Vec = Op.getOperand(0); + unsigned Subreg = Hexagon::subreg_loreg; + return DAG.getTargetExtractSubreg(Subreg, dl, ResVT, Vec); + } + + if (ResVT.getVectorNumElements() == Val) { + SDValue Vec = Op.getOperand(0); + unsigned Subreg = Hexagon::subreg_hireg; + return DAG.getTargetExtractSubreg(Subreg, dl, ResVT, Vec); + } + + return SDValue(); +} + +SDValue HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const { + // If we are dealing with EXTRACT_SUBVECTOR on a HVX type, we may + // be able to simplify it to an EXTRACT_SUBREG. + if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR && Subtarget.useHVXOps() && + IsHvxVectorType(Op.getValueType().getSimpleVT())) + return LowerEXTRACT_SUBVECTOR_HVX(Op, DAG); + EVT VT = Op.getValueType(); int VTN = VT.isVector() ? VT.getVectorNumElements() : 1; SDLoc dl(Op); @@ -2631,13 +2759,14 @@ HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op, llvm_unreachable("Bad offset"); N = DAG.getTargetExtractSubreg(Subreg, dl, MVT::i32, Vec); - } else if (VecVT.getSizeInBits() == 32) { + } else if (SVT.getSizeInBits() == 32) { N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i32, Ops); - } else { + } else if (SVT.getSizeInBits() == 64) { N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i64, Ops); if (VT.getSizeInBits() == 32) N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N); - } + } else + return SDValue(); return DAG.getNode(ISD::BITCAST, dl, VT, N); } @@ -2684,8 +2813,10 @@ HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op, SDValue N; if (VT.getSizeInBits() == 32) N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32, Ops); - else + else if (VT.getSizeInBits() == 64) N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i64, Ops); + else + return SDValue(); return DAG.getNode(ISD::BITCAST, dl, VT, N); } @@ -2708,8 +2839,10 @@ HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op, SDValue N; if (VT.getSizeInBits() == 32) N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops); - else + else if (VT.getSizeInBits() == 64) N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops); + else + return SDValue(); return DAG.getNode(ISD::BITCAST, dl, VT, N); } diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 71f67349bef..5291a8737e2 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -79,6 +79,7 @@ bool isPositiveHalfWord(SDNode *N); EXTRACTU, EXTRACTURP, VCOMBINE, + VPACK, TC_RETURN, EH_RETURN, DCFETCH, @@ -124,7 +125,10 @@ bool isPositiveHalfWord(SDNode *N); const char *getTargetNodeName(unsigned Opcode) const override; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_SUBVECTOR_HVX(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td b/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td index 3b6ca04cbc1..8e984fbf0ec 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td @@ -1659,6 +1659,46 @@ defm V6_vpackoh : T_HVX_alu_VV <"$dst.h = vpacko($src1.w,$src2.w)">, V6_vpackoh_enc; } +def SDTHexagonVPACK: SDTypeProfile<1, 3, [SDTCisSameAs<1, 2>, + SDTCisInt<3>]>; + +def HexagonVPACK: SDNode<"HexagonISD::VPACK", SDTHexagonVPACK>; + +// 0 as the last argument denotes vpacke. 1 denotes vpacko +def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs), + (v64i8 VectorRegs:$Vt), (i32 0))), + (V6_vpackeb VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; +def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs), + (v64i8 VectorRegs:$Vt), (i32 1))), + (V6_vpackob VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; +def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs), + (v32i16 VectorRegs:$Vt), (i32 0))), + (V6_vpackeh VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; +def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs), + (v32i16 VectorRegs:$Vt), (i32 1))), + (V6_vpackoh VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; + +def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs), + (v128i8 VecDblRegs:$Vt), (i32 0))), + (V6_vpackeb_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; +def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs), + (v128i8 VecDblRegs:$Vt), (i32 1))), + (V6_vpackob_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; +def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs), + (v64i16 VecDblRegs:$Vt), (i32 0))), + (V6_vpackeh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; +def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs), + (v64i16 VecDblRegs:$Vt), (i32 1))), + (V6_vpackoh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; + let hasNewValue = 1, hasSideEffects = 0 in class T_HVX_condALU <string asmString, RegisterClass RC1, RegisterClass RC2> : CVI_VA_Resource1 <(outs RC2:$dst), |

