summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-12-08 00:16:09 +0000
committerCraig Topper <craig.topper@intel.com>2017-12-08 00:16:09 +0000
commit323ba39f101ca87be4a93e6967ab89ef93071053 (patch)
tree6a9d813a251e6dd2e791757c197c73be8ba9ede3 /llvm/lib/Target/X86/X86ISelLowering.cpp
parentfd86b3cf22c4b429ceb056ef22f33286add83c89 (diff)
downloadbcm5719-llvm-323ba39f101ca87be4a93e6967ab89ef93071053.tar.gz
bcm5719-llvm-323ba39f101ca87be4a93e6967ab89ef93071053.zip
[X86] Handle alls version of vXi1 insert_vector_elt with a constant index without falling back to shuffles.
We previously only supported inserting to the LSB or MSB where it was easy to zero to perform an OR to insert. This change effectively extracts the old value and the new value, xors them together and then xors that single bit with the correct location in the original vector. This will cancel out the old value in the first xor leaving the new value in the position. The way I've implemented this uses 3 shifts and two xors and uses an additional register. We can avoid the additional register at the cost of another shift. llvm-svn: 320120
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp43
1 files changed, 22 insertions, 21 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index da3e319f55f..6b2a43db570 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -14699,21 +14699,14 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG,
// If the kshift instructions of the correct width aren't natively supported
// then we need to promote the vector to the native size to get the correct
// zeroing behavior.
- bool HasNativeShift = true;
if ((!Subtarget.hasDQI() && NumElems == 8) || (NumElems < 8)) {
- HasNativeShift = false;
- // For now don't do this if we are going to end up using the shuffle
- // below. This minimizes test diffs.
- // TODO: Remove this restriction once we no longer need a shuffle fallback.
- if (Vec.isUndef() || IdxVal == 0) {
- // Need to promote to v16i1, do the insert, then extract back.
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
- DAG.getUNDEF(MVT::v16i1), Vec,
- DAG.getIntPtrConstant(0, dl));
- Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i1, Vec, Elt, Idx);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Op,
- DAG.getIntPtrConstant(0, dl));
- }
+ // Need to promote to v16i1, do the insert, then extract back.
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
+ DAG.getUNDEF(MVT::v16i1), Vec,
+ DAG.getIntPtrConstant(0, dl));
+ Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i1, Vec, Elt, Idx);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Op,
+ DAG.getIntPtrConstant(0, dl));
}
SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
@@ -14741,7 +14734,7 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
// Insertion of one bit into last position
- if (HasNativeShift && IdxVal == NumElems - 1) {
+ if (IdxVal == NumElems - 1) {
// Move the bit to the last position inside the vector.
EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
@@ -14754,12 +14747,20 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
- // Use shuffle to insert element.
- SmallVector<int, 64> MaskVec(NumElems);
- for (unsigned i = 0; i != NumElems; ++i)
- MaskVec[i] = (i == IdxVal) ? NumElems : i;
-
- return DAG.getVectorShuffle(VecVT, dl, Vec, EltInVec, MaskVec);
+ // Move the current value of the bit to be replace to bit 0.
+ SDValue Merged = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
+ DAG.getConstant(IdxVal, dl, MVT::i8));
+ // Xor with the new bit.
+ Merged = DAG.getNode(ISD::XOR, dl, VecVT, Merged, EltInVec);
+ // Shift to MSB, filling bottom bits with 0.
+ Merged = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Merged,
+ DAG.getConstant(NumElems - 1, dl, MVT::i8));
+ // Shift to the final position, filling upper bits with 0.
+ Merged = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Merged,
+ DAG.getConstant(NumElems - 1 - IdxVal, dl, MVT::i8));
+ // Xor with original vector to cancel out the original bit value that's still
+ // present.
+ return DAG.getNode(ISD::XOR, dl, VecVT, Merged, Vec);
}
SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
OpenPOWER on IntegriCloud