diff options
| author | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-08-12 21:48:26 +0000 |
|---|---|---|
| committer | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-08-12 21:48:26 +0000 |
| commit | f15dfe58183e4910c2cfa532d28ea7199279e0ac (patch) | |
| tree | 03c1283f0c9f2e60a980ddf6eae8c880af3fd822 /llvm/lib/Target/X86/X86ISelLowering.cpp | |
| parent | 960c8f71aa9aeadc3ec751dd6b4e15ff0317380c (diff) | |
| download | bcm5719-llvm-f15dfe58183e4910c2cfa532d28ea7199279e0ac.tar.gz bcm5719-llvm-f15dfe58183e4910c2cfa532d28ea7199279e0ac.zip | |
The VPERM2F128 is a AVX instruction which permutes between two 256-bit
vectors. It operates on 128-bit elements instead of regular scalar
types. Recognize shuffles that are suitable for VPERM2F128 and teach
the x86 legalizer how to handle them.
llvm-svn: 137519
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 10ab70750e3..4c83a728a55 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2753,6 +2753,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::VPERMILPSY: case X86ISD::VPERMILPD: case X86ISD::VPERMILPDY: + case X86ISD::VPERM2F128: return true; } return false; @@ -2795,6 +2796,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PALIGN: case X86ISD::SHUFPD: case X86ISD::SHUFPS: + case X86ISD::VPERM2F128: return DAG.getNode(Opc, dl, VT, V1, V2, DAG.getConstant(TargetMask, MVT::i8)); } @@ -3033,6 +3035,17 @@ static bool isUndefOrEqual(int Val, int CmpVal) { return false; } +/// isUndefOrInRange - Return true if every element in Mask, begining from +/// position Pos and ending in Pos+Size, falls within the specified sequential +/// range (L, L+Pos]. or is undef. +static bool isSequentialOrUndefInRange(const SmallVectorImpl<int> &Mask, + int Pos, int Size, int Low) { + for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low) + if (!isUndefOrEqual(Mask[i], Low)) + return false; + return true; +} + /// isPSHUFDMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference /// the second operand. @@ -3444,6 +3457,67 @@ bool X86::isMOVLMask(ShuffleVectorSDNode *N) { return ::isMOVLMask(M, N->getValueType(0)); } +/// isVPERM2F128Mask - Match 256-bit shuffles where the elements are considered +/// as permutations between 128-bit chunks or halves. As an example: this +/// shuffle bellow: +/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15> +/// The first half comes from the second half of V1 and the second half from the +/// the second half of V2. +static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT, + const X86Subtarget *Subtarget) { + if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256) + return false; + + // The shuffle result is divided into half A and half B. In total the two + // sources have 4 halves, namely: C, D, E, F. The final values of A and + // B must come from C, D, E or F. + int HalfSize = VT.getVectorNumElements()/2; + bool MatchA = false, MatchB = false; + + // Check if A comes from one of C, D, E, F. + for (int Half = 0; Half < 4; ++Half) { + if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) { + MatchA = true; + break; + } + } + + // Check if B comes from one of C, D, E, F. + for (int Half = 0; Half < 4; ++Half) { + if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) { + MatchB = true; + break; + } + } + + return MatchA && MatchB; +} + +/// getShuffleVPERM2F128Immediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_MASK mask with VPERM2F128 instructions. +static unsigned getShuffleVPERM2F128Immediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + EVT VT = SVOp->getValueType(0); + + int HalfSize = VT.getVectorNumElements()/2; + + int FstHalf = 0, SndHalf = 0; + for (int i = 0; i < HalfSize; ++i) { + if (SVOp->getMaskElt(i) > 0) { + FstHalf = SVOp->getMaskElt(i)/HalfSize; + break; + } + } + for (int i = HalfSize; i < HalfSize*2; ++i) { + if (SVOp->getMaskElt(i) > 0) { + SndHalf = SVOp->getMaskElt(i)/HalfSize; + break; + } + } + + return (FstHalf | (SndHalf << 4)); +} + /// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to VPERMILPD*. /// Note that VPERMIL mask matching is different depending whether theunderlying @@ -4317,6 +4391,11 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, DecodeVPERMILPDMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(), ShuffleMask); break; + case X86ISD::VPERM2F128: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; default: assert("not implemented for target shuffle node"); return SDValue(); @@ -6335,6 +6414,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1, getShuffleVPERMILPDImmediate(SVOp), DAG); + // Handle VPERM2F128 permutations + if (isVPERM2F128Mask(M, VT, Subtarget)) + return getTargetShuffleNode(X86ISD::VPERM2F128, dl, VT, V1, V2, + getShuffleVPERM2F128Immediate(SVOp), DAG); + //===--------------------------------------------------------------------===// // Since no target specific shuffle was selected for this generic one, // lower it into other known shuffles. FIXME: this isn't true yet, but @@ -10052,6 +10136,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY"; case X86ISD::VPERMILPD: return "X86ISD::VPERMILPD"; case X86ISD::VPERMILPDY: return "X86ISD::VPERMILPDY"; + case X86ISD::VPERM2F128: return "X86ISD::VPERM2F128"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; @@ -13134,6 +13219,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::VPERMILPSY: case X86ISD::VPERMILPD: case X86ISD::VPERMILPDY: + case X86ISD::VPERM2F128: case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI); } |

