diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2018-11-08 19:16:27 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2018-11-08 19:16:27 +0000 |
| commit | b5535dc7b364671e63d6ab6cd1abb1f8745232bf (patch) | |
| tree | 205c8fe4d991166cc1ec0413ae2fff4fbded8bfc /llvm/lib/Target | |
| parent | 6ce9f72f76e3c1c9c5b1cd5a65ba2b0bb319294f (diff) | |
| download | bcm5719-llvm-b5535dc7b364671e63d6ab6cd1abb1f8745232bf.tar.gz bcm5719-llvm-b5535dc7b364671e63d6ab6cd1abb1f8745232bf.zip | |
[x86] use shuffles for scalar insertion into high elements of a constant vector
As discussed in D54073, we have a potential regression from more aggressive vector narrowing here, so let's try to avoid that by changing build-vector lowering slightly.
Insert-vector-element lowering always does this since there's no "pinsr" for ymm/zmm:
// If the vector is wider than 128 bits, extract the 128-bit subvector, insert
// into that, and then insert the subvector back into the result.
...but we can sometimes do better for insert-into-constant-vector by using shuffle lowering.
Differential Revision: https://reviews.llvm.org/D54271
llvm-svn: 346433
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 22 |
1 files changed, 18 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 38d3a30cb19..ddc418579e0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8576,9 +8576,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // If we are inserting one variable into a vector of non-zero constants, try // to avoid loading each constant element as a scalar. Load the constants as a // vector and then insert the variable scalar element. If insertion is not - // supported, we assume that we will fall back to a shuffle to get the scalar - // blended with the constants. Insertion into a zero vector is handled as a - // special-case somewhere below here. + // supported, fall back to a shuffle to get the scalar blended with the + // constants. Insertion into a zero vector is handled as a special-case + // somewhere below here. if (NumConstants == NumElems - 1 && NumNonZero != 1 && (isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT) || isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) { @@ -8616,7 +8616,21 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachinePointerInfo MPI = MachinePointerInfo::getConstantPool(MF); SDValue Ld = DAG.getLoad(VT, dl, DAG.getEntryNode(), LegalDAGConstVec, MPI); - return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex); + unsigned InsertC = cast<ConstantSDNode>(InsIndex)->getZExtValue(); + unsigned NumEltsInLow128Bits = 128 / VT.getScalarSizeInBits(); + if (InsertC < NumEltsInLow128Bits) + return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex); + + // There's no good way to insert into the high elements of a >128-bit + // vector, so use shuffles to avoid an extract/insert sequence. + assert(VT.getSizeInBits() > 128 && "Invalid insertion index?"); + assert(Subtarget.hasAVX() && "Must have AVX with >16-byte vector"); + SmallVector<int, 8> ShuffleMask; + unsigned NumElts = VT.getVectorNumElements(); + for (unsigned i = 0; i != NumElts; ++i) + ShuffleMask.push_back(i == InsertC ? NumElts : i); + SDValue S2V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, VarElt); + return DAG.getVectorShuffle(VT, dl, Ld, S2V, ShuffleMask); } // Special case for single non-zero, non-undef, element. |

