summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2018-11-08 19:16:27 +0000
committerSanjay Patel <spatel@rotateright.com>2018-11-08 19:16:27 +0000
commitb5535dc7b364671e63d6ab6cd1abb1f8745232bf (patch)
tree205c8fe4d991166cc1ec0413ae2fff4fbded8bfc /llvm/lib/Target
parent6ce9f72f76e3c1c9c5b1cd5a65ba2b0bb319294f (diff)
downloadbcm5719-llvm-b5535dc7b364671e63d6ab6cd1abb1f8745232bf.tar.gz
bcm5719-llvm-b5535dc7b364671e63d6ab6cd1abb1f8745232bf.zip
[x86] use shuffles for scalar insertion into high elements of a constant vector
As discussed in D54073, we have a potential regression from more aggressive vector narrowing here, so let's try to avoid that by changing build-vector lowering slightly. Insert-vector-element lowering always does this since there's no "pinsr" for ymm/zmm: // If the vector is wider than 128 bits, extract the 128-bit subvector, insert // into that, and then insert the subvector back into the result. ...but we can sometimes do better for insert-into-constant-vector by using shuffle lowering. Differential Revision: https://reviews.llvm.org/D54271 llvm-svn: 346433
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp22
1 files changed, 18 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 38d3a30cb19..ddc418579e0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8576,9 +8576,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// If we are inserting one variable into a vector of non-zero constants, try
// to avoid loading each constant element as a scalar. Load the constants as a
// vector and then insert the variable scalar element. If insertion is not
- // supported, we assume that we will fall back to a shuffle to get the scalar
- // blended with the constants. Insertion into a zero vector is handled as a
- // special-case somewhere below here.
+ // supported, fall back to a shuffle to get the scalar blended with the
+ // constants. Insertion into a zero vector is handled as a special-case
+ // somewhere below here.
if (NumConstants == NumElems - 1 && NumNonZero != 1 &&
(isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT) ||
isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) {
@@ -8616,7 +8616,21 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MachinePointerInfo MPI = MachinePointerInfo::getConstantPool(MF);
SDValue Ld = DAG.getLoad(VT, dl, DAG.getEntryNode(), LegalDAGConstVec, MPI);
- return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex);
+ unsigned InsertC = cast<ConstantSDNode>(InsIndex)->getZExtValue();
+ unsigned NumEltsInLow128Bits = 128 / VT.getScalarSizeInBits();
+ if (InsertC < NumEltsInLow128Bits)
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex);
+
+ // There's no good way to insert into the high elements of a >128-bit
+ // vector, so use shuffles to avoid an extract/insert sequence.
+ assert(VT.getSizeInBits() > 128 && "Invalid insertion index?");
+ assert(Subtarget.hasAVX() && "Must have AVX with >16-byte vector");
+ SmallVector<int, 8> ShuffleMask;
+ unsigned NumElts = VT.getVectorNumElements();
+ for (unsigned i = 0; i != NumElts; ++i)
+ ShuffleMask.push_back(i == InsertC ? NumElts : i);
+ SDValue S2V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, VarElt);
+ return DAG.getVectorShuffle(VT, dl, Ld, S2V, ShuffleMask);
}
// Special case for single non-zero, non-undef, element.
OpenPOWER on IntegriCloud