[X86] Teach the backend how to lower vector shift left into multiply rather than scalarizing it.

Instead of expanding a packed shift into a sequence of scalar shifts, the backend now tries (when possible) to convert the vector shift into a vector multiply. Before this change, a shift of a MVT::v8i16 vector by a build_vector of constants was always scalarized into a long sequence of "vector extracts + scalar shifts + vector insert". With this change, if there is SSE2 support, we emit a single vector multiply. This change also affects SSE4.1, AVX, AVX2 shifts: - A shift of a MVT::v4i32 vector by a build_vector of non uniform constants is now lowered when possible into a single SSE4.1 vector multiply. - Packed v16i16 shift left by constant build_vector are now expanded when possible into a single AVX2 vpmullw. This change also improves the lowering of AVX512f vector shifts. Added test CodeGen/X86/vec_shift6.ll with some code examples that are affected by this change. llvm-svn: 201271
author: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> 2014-02-12 23:42:28 +0000
committer: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> 2014-02-12 23:42:28 +0000
commit: 386d566395d90b01ddddf3d0ab219097cca8c37c (patch)
tree: f53452ea49b41ace09a4e3a5ace515f7f9b49f87 /llvm/lib
parent: fdde7ccf376c0269929541573650df5075f201b1 (diff)
download: bcm5719-llvm-386d566395d90b01ddddf3d0ab219097cca8c37c.tar.gz
bcm5719-llvm-386d566395d90b01ddddf3d0ab219097cca8c37c.zip
1 files changed, 33 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 85656d80914..4ce2ea36c00 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -13156,6 +13156,39 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
       return Op;
   }
 
+  // If possible, lower this packed shift into a vector multiply instead of
+  // expanding it into a sequence of scalar shifts.
+  // Do this only if the vector shift count is a constant build_vector.
+  if (Op.getOpcode() == ISD::SHL && 
+      (VT == MVT::v8i16 || VT == MVT::v4i32 ||
+       (Subtarget->hasInt256() && VT == MVT::v16i16)) &&
+      ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
+    SmallVector<SDValue, 8> Elts;
+    EVT SVT = VT.getScalarType();
+    unsigned SVTBits = SVT.getSizeInBits();
+    const APInt &One = APInt(SVTBits, 1);
+    unsigned NumElems = VT.getVectorNumElements();
+
+    for (unsigned i=0; i !=NumElems; ++i) {
+      SDValue Op = Amt->getOperand(i);
+      if (Op->getOpcode() == ISD::UNDEF) {
+        Elts.push_back(Op);
+        continue;
+      }
+
+      ConstantSDNode *ND = cast<ConstantSDNode>(Op);
+      const APInt &C = APInt(SVTBits, ND->getAPIntValue().getZExtValue());
+      uint64_t ShAmt = C.getZExtValue();
+      if (ShAmt >= SVTBits) {
+        Elts.push_back(DAG.getUNDEF(SVT));
+        continue;
+      }
+      Elts.push_back(DAG.getConstant(One.shl(ShAmt), SVT));
+    }
+    SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Elts[0], NumElems);
+    return DAG.getNode(ISD::MUL, dl, VT, R, BV);
+  }
+
   // Lower SHL with variable shift amount.
   if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
     Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT));
author	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>	2014-02-12 23:42:28 +0000
committer	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>	2014-02-12 23:42:28 +0000
commit	386d566395d90b01ddddf3d0ab219097cca8c37c (patch)
tree	f53452ea49b41ace09a4e3a5ace515f7f9b49f87 /llvm/lib
parent	fdde7ccf376c0269929541573650df5075f201b1 (diff)
download	bcm5719-llvm-386d566395d90b01ddddf3d0ab219097cca8c37c.tar.gz bcm5719-llvm-386d566395d90b01ddddf3d0ab219097cca8c37c.zip