diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-02-01 18:10:30 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-02-01 18:10:30 +0000 |
| commit | 1a8cefc3286a026be0db24fd78bcb610501b446d (patch) | |
| tree | 1c40ead97f570fad428b723cf2ca71f57f64828f /llvm/lib | |
| parent | 6691e112ce615bb580fa857fe4aad4d3ce7b74b7 (diff) | |
| download | bcm5719-llvm-1a8cefc3286a026be0db24fd78bcb610501b446d.tar.gz bcm5719-llvm-1a8cefc3286a026be0db24fd78bcb610501b446d.zip | |
[X86][SSE] LowerBUILD_VECTORAsVariablePermute - add support for scaling index vectors
This allows us to use PSHUFB for v8i16/v4i32 and VPERMD/PERMPS for v4i64/v4f64 variable shuffles.
Differential Revision: https://reviews.llvm.org/D42487
llvm-svn: 323987
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 49 |
1 files changed, 44 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5b90f7e4a33..10180ba362b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7818,8 +7818,6 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG, // TODO: Handle undefs // TODO: Utilize pshufb and zero mask blending to support more efficient // construction of vectors with constant-0 elements. -// TODO: Use smaller-element vectors of same width, and "interpolate" the -// indices, when no native operation available. static SDValue LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -7833,11 +7831,22 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG, if (Subtarget.hasSSE3()) Opcode = X86ISD::PSHUFB; break; + case MVT::v8i16: + if (Subtarget.hasVLX() && Subtarget.hasBWI()) + Opcode = X86ISD::VPERMV; + else if (Subtarget.hasSSE3()) { + Opcode = X86ISD::PSHUFB; + ShuffleVT = MVT::v16i8; + } + break; case MVT::v4f32: case MVT::v4i32: if (Subtarget.hasAVX()) { Opcode = X86ISD::VPERMILPV; ShuffleVT = MVT::v4f32; + } else if (Subtarget.hasSSE3()) { + Opcode = X86ISD::PSHUFB; + ShuffleVT = MVT::v16i8; } break; case MVT::v2f64: @@ -7856,6 +7865,10 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG, case MVT::v4f64: if (Subtarget.hasVLX()) Opcode = X86ISD::VPERMV; + else if (Subtarget.hasAVX2()) { + Opcode = X86ISD::VPERMV; + ShuffleVT = MVT::v8f32; + } break; case MVT::v16f32: case MVT::v8f64: @@ -7868,7 +7881,6 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG, if (Subtarget.hasBWI()) Opcode = X86ISD::VPERMV; break; - case MVT::v8i16: case MVT::v16i16: if (Subtarget.hasVLX() && Subtarget.hasBWI()) Opcode = X86ISD::VPERMV; @@ -7927,8 +7939,8 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG, unsigned Opcode = LegalPermuteOpcode(VT, ShuffleVT); if (!Opcode) return SDValue(); - assert(VT.getScalarSizeInBits() == ShuffleVT.getScalarSizeInBits() && - VT.getVectorNumElements() == ShuffleVT.getVectorNumElements() && + assert((VT.getSizeInBits() == ShuffleVT.getSizeInBits()) && + (VT.getScalarSizeInBits() % ShuffleVT.getScalarSizeInBits()) == 0 && "Illegal variable permute shuffle type"); unsigned NumElts = VT.getVectorNumElements(); @@ -7950,6 +7962,33 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG, SrcVec, DAG.getIntPtrConstant(0, SDLoc(SrcVec))); } + uint64_t Scale = VT.getScalarSizeInBits() / ShuffleVT.getScalarSizeInBits(); + if (Scale > 1) { + assert(isPowerOf2_64(Scale) && "Illegal variable permute shuffle scale"); + unsigned ShuffleBits = ShuffleVT.getScalarSizeInBits(); + uint64_t IndexScale = 0; + uint64_t IndexOffset = 0; + + // If we're scaling a smaller permute op, then we need to repeat the indices, + // scaling and offsetting them as well. + // e.g. v4i32 -> v16i8 (Scale = 4) + // IndexScale = v4i32 Splat(4 << 24 | 4 << 16 | 4 << 8 | 4) + // indexOffset = v4i32 Splat(3 << 24 | 2 << 16 | 1 << 8 | 0) + for (uint64_t i = 0; i != Scale; ++i) { + IndexScale |= Scale << (i * ShuffleBits); + IndexOffset |= i << (i * ShuffleBits); + } + + SDLoc DL(IndicesVec); + IndicesVec = DAG.getNode(ISD::MUL, DL, IndicesVT, IndicesVec, + DAG.getConstant(IndexScale, DL, IndicesVT)); + IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, + DAG.getConstant(IndexOffset, DL, IndicesVT)); + } + + EVT ShuffleIdxVT = EVT(ShuffleVT).changeVectorElementTypeToInteger(); + IndicesVec = DAG.getBitcast(ShuffleIdxVT, IndicesVec); + SrcVec = DAG.getBitcast(ShuffleVT, SrcVec); SDValue Res = Opcode == X86ISD::VPERMV |

