From f3ad23551ddf178496892c0e42d6df380ec13c89 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 19 May 2014 13:12:38 +0000 Subject: SDAG: Legalize vector BSWAP into a shuffle if the shuffle is legal but the bswap not. - On ARM/ARM64 we get a vrev because the shuffle matching code is really smart. We still unroll anything that's not v4i32 though. - On X86 we get a pshufb with SSSE3. Required more cleverness in isShuffleMaskLegal. - On PPC we get a vperm for v8i16 and v4i32. v2i64 is unrolled. llvm-svn: 209123 --- .../lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 27 ++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp') diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 6bcbc8e0ff4..898cd29c914 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -63,6 +63,8 @@ class VectorLegalizer { SDValue ExpandUINT_TO_FLOAT(SDValue Op); // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. SDValue ExpandSEXTINREG(SDValue Op); + // Expand bswap of vectors into a shuffle if legal. + SDValue ExpandBSWAP(SDValue Op); // Implement vselect in terms of XOR, AND, OR when blend is not supported // by the target. SDValue ExpandVSELECT(SDValue Op); @@ -297,6 +299,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case TargetLowering::Expand: if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG) Result = ExpandSEXTINREG(Op); + else if (Node->getOpcode() == ISD::BSWAP) + Result = ExpandBSWAP(Op); else if (Node->getOpcode() == ISD::VSELECT) Result = ExpandVSELECT(Op); else if (Node->getOpcode() == ISD::SELECT) @@ -682,6 +686,29 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); } +SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { + EVT VT = Op.getValueType(); + + // Generate a byte wise shuffle mask for the BSWAP. + SmallVector ShuffleMask; + int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; + for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) + for (int J = ScalarSizeInBytes - 1; J >= 0; --J) + ShuffleMask.push_back((I * ScalarSizeInBytes) + J); + + EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); + + // Only emit a shuffle if the mask is legal. + if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) + return DAG.UnrollVectorOp(Op.getNode()); + + SDLoc DL(Op); + Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); + Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), + ShuffleMask.data()); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. -- cgit v1.2.3