[X86] Handle alls version of vXi1 insert_vector_elt with a constant index without falling back to shuffles.

We previously only supported inserting to the LSB or MSB where it was easy to zero to perform an OR to insert. This change effectively extracts the old value and the new value, xors them together and then xors that single bit with the correct location in the original vector. This will cancel out the old value in the first xor leaving the new value in the position. The way I've implemented this uses 3 shifts and two xors and uses an additional register. We can avoid the additional register at the cost of another shift. llvm-svn: 320120
author: Craig Topper <craig.topper@intel.com> 2017-12-08 00:16:09 +0000
committer: Craig Topper <craig.topper@intel.com> 2017-12-08 00:16:09 +0000
commit: 323ba39f101ca87be4a93e6967ab89ef93071053 (patch)
tree: 6a9d813a251e6dd2e791757c197c73be8ba9ede3 /llvm/lib/Target/X86/X86ISelLowering.cpp
parent: fd86b3cf22c4b429ceb056ef22f33286add83c89 (diff)
download: bcm5719-llvm-323ba39f101ca87be4a93e6967ab89ef93071053.tar.gz
bcm5719-llvm-323ba39f101ca87be4a93e6967ab89ef93071053.zip
1 files changed, 22 insertions, 21 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index da3e319f55f..6b2a43db570 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -14699,21 +14699,14 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG,
   // If the kshift instructions of the correct width aren't natively supported
   // then we need to promote the vector to the native size to get the correct
   // zeroing behavior.
-  bool HasNativeShift = true;
   if ((!Subtarget.hasDQI() && NumElems == 8) || (NumElems < 8)) {
-    HasNativeShift = false;
-    // For now don't do this if we are going to end up using the shuffle
-    // below. This minimizes test diffs.
-    // TODO: Remove this restriction once we no longer need a shuffle fallback.
-    if (Vec.isUndef() || IdxVal == 0) {
-      // Need to promote to v16i1, do the insert, then extract back.
-      Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
-                        DAG.getUNDEF(MVT::v16i1), Vec,
-                        DAG.getIntPtrConstant(0, dl));
-      Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i1, Vec, Elt, Idx);
-      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Op,
-                         DAG.getIntPtrConstant(0, dl));
-    }
+    // Need to promote to v16i1, do the insert, then extract back.
+    Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
+                      DAG.getUNDEF(MVT::v16i1), Vec,
+                      DAG.getIntPtrConstant(0, dl));
+    Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i1, Vec, Elt, Idx);
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Op,
+                       DAG.getIntPtrConstant(0, dl));
   }
 
   SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
@@ -14741,7 +14734,7 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG,
     return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
   }
   // Insertion of one bit into last position
-  if (HasNativeShift && IdxVal == NumElems - 1) {
+  if (IdxVal == NumElems - 1) {
     // Move the bit to the last position inside the vector.
     EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
                            DAG.getConstant(IdxVal, dl, MVT::i8));
@@ -14754,12 +14747,20 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG,
     return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
   }
 
-  // Use shuffle to insert element.
-  SmallVector<int, 64> MaskVec(NumElems);
-  for (unsigned i = 0; i != NumElems; ++i)
-    MaskVec[i] = (i == IdxVal) ? NumElems : i;
-
-  return DAG.getVectorShuffle(VecVT, dl, Vec, EltInVec, MaskVec);
+  // Move the current value of the bit to be replace to bit 0.
+  SDValue Merged = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
+                               DAG.getConstant(IdxVal, dl, MVT::i8));
+  // Xor with the new bit.
+  Merged = DAG.getNode(ISD::XOR, dl, VecVT, Merged, EltInVec);
+  // Shift to MSB, filling bottom bits with 0.
+  Merged = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Merged,
+                       DAG.getConstant(NumElems - 1, dl, MVT::i8));
+  // Shift to the final position, filling upper bits with 0.
+  Merged = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Merged,
+                       DAG.getConstant(NumElems - 1 - IdxVal, dl, MVT::i8));
+  // Xor with original vector to cancel out the original bit value that's still
+  // present.
+  return DAG.getNode(ISD::XOR, dl, VecVT, Merged, Vec);
 }
 
 SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
author	Craig Topper <craig.topper@intel.com>	2017-12-08 00:16:09 +0000
committer	Craig Topper <craig.topper@intel.com>	2017-12-08 00:16:09 +0000
commit	323ba39f101ca87be4a93e6967ab89ef93071053 (patch)
tree	6a9d813a251e6dd2e791757c197c73be8ba9ede3 /llvm/lib/Target/X86/X86ISelLowering.cpp
parent	fd86b3cf22c4b429ceb056ef22f33286add83c89 (diff)
download	bcm5719-llvm-323ba39f101ca87be4a93e6967ab89ef93071053.tar.gz bcm5719-llvm-323ba39f101ca87be4a93e6967ab89ef93071053.zip