DAGCombiner: Fold pointless truncate, bitcast, buildvector series

(2xi32) (truncate ((2xi64) bitcast (buildvector i32 a, i32 x, i32 b, i32 y))) can be folded into a (2xi32) (buildvector i32 a, i32 b). Such a DAG would cause uneccessary vdup instructions followed by vmovn instructions. We generate this code on ARM NEON for a setcc olt, 2xf64, 2xf64. For example, in the vectorized version of the code below. double A[N]; double B[N]; void test_double_compare_to_double() { int i; for(i=0;i<N;i++) A[i] = (double)(A[i] < B[i]); } radar://13191881 Fixes bug 15283. llvm-svn: 175670
author: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-02-20 21:33:32 +0000
committer: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-02-20 21:33:32 +0000
commit: 3f9568e921d706b0755b5edb2f0649ef8485ef17 (patch)
tree: 898743473e4b0ec73b5d4df6fa24bd9675f3a87b
parent: 64c2e69809bc09db413abc740c0bc881ade2d39e (diff)
download: bcm5719-llvm-3f9568e921d706b0755b5edb2f0649ef8485ef17.tar.gz
bcm5719-llvm-3f9568e921d706b0755b5edb2f0649ef8485ef17.zip
2 files changed, 47 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9d40ff799d0..2777d7c2ebc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5361,6 +5361,38 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
     }
   }
 
+  // Fold a series of buildvector, bitcast, and truncate if possible.
+  // For example fold
+  //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
+  //   (2xi32 (buildvector x, y)).
+  if (Level == AfterLegalizeVectorOps && VT.isVector() &&
+      N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+      N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+      N0.getOperand(0).hasOneUse()) {
+
+    SDValue BuildVect = N0.getOperand(0);
+    EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
+    EVT TruncVecEltTy = VT.getVectorElementType();
+
+    // Check that the element types match.
+    if (BuildVectEltTy == TruncVecEltTy) {
+      // Now we only need to compute the offset of the truncated elements.
+      unsigned BuildVecNumElts =  BuildVect.getNumOperands();
+      unsigned TruncVecNumElts = VT.getVectorNumElements();
+      unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
+
+      assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
+             "Invalid number of elements");
+
+      SmallVector<SDValue, 8> Opnds;
+      for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
+        Opnds.push_back(BuildVect.getOperand(i));
+
+      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Opnds[0],
+                         Opnds.size());
+    }
+  }
+
   // See if we can simplify the input to this truncate through knowledge that
   // only the low bits are being used.
   // For example "trunc (or (shl x, 8), y)" // -> trunc y
diff --git a/llvm/test/CodeGen/ARM/neon_cmp.ll b/llvm/test/CodeGen/ARM/neon_cmp.ll
new file mode 100644
index 00000000000..046b5da2289
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/neon_cmp.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+; bug 15283
+; radar://13191881
+; CHECK: vfcmp
+define void @vfcmp(<2 x double>* %a, <2 x double>* %b) {
+  %wide.load = load <2 x double>* %a, align 4
+  %wide.load2 = load <2 x double>* %b, align 4
+; CHECK-NOT: vdup.32
+; CHECK-NOT: vmovn.i64
+  %v1 = fcmp olt <2 x double> %wide.load, %wide.load2
+  %v2 = zext <2 x i1> %v1 to <2 x i32>
+  %v3 = sitofp <2 x i32> %v2 to <2 x double>
+  store <2 x double> %v3, <2 x double>* %b, align 4
+  ret void
+}
author	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-02-20 21:33:32 +0000
committer	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-02-20 21:33:32 +0000
commit	3f9568e921d706b0755b5edb2f0649ef8485ef17 (patch)
tree	898743473e4b0ec73b5d4df6fa24bd9675f3a87b
parent	64c2e69809bc09db413abc740c0bc881ade2d39e (diff)
download	bcm5719-llvm-3f9568e921d706b0755b5edb2f0649ef8485ef17.tar.gz bcm5719-llvm-3f9568e921d706b0755b5edb2f0649ef8485ef17.zip