summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2015-02-22 18:17:28 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2015-02-22 18:17:28 +0000
commit4e30d9b6d85a0a4a79c9b527f39deff18b302e4f (patch)
tree85a4be0a21acf06d6ecca8b685bf8c3e15730205
parente2dd84e42f037458ce9ab863bbc590c84f20c977 (diff)
downloadbcm5719-llvm-4e30d9b6d85a0a4a79c9b527f39deff18b302e4f.tar.gz
bcm5719-llvm-4e30d9b6d85a0a4a79c9b527f39deff18b302e4f.zip
[DagCombiner] Generalized BuildVector Vector Concatenation
The CONCAT_VECTORS combiner pass can transform the concat of two BUILD_VECTOR nodes into a single BUILD_VECTOR node. This patch generalises this to support any number of BUILD_VECTOR nodes, and also permits UNDEF nodes to be included as well. This was noticed as AVX vec128 -> vec256 canonicalization sometimes creates a CONCAT_VECTOR with a real vec128 lower and an vec128 UNDEF upper. Differential Revision: http://reviews.llvm.org/D7816 llvm-svn: 230177
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp61
-rw-r--r--llvm/test/CodeGen/X86/vector-zext.ll33
2 files changed, 49 insertions, 45 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a762bfd01b5..1f98fde13a8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11430,36 +11430,51 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
}
}
+ // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
+ // We have already tested above for an UNDEF only concatenation.
// fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
// -> (BUILD_VECTOR A, B, ..., C, D, ...)
- if (N->getNumOperands() == 2 &&
- N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
- N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) {
- EVT VT = N->getValueType(0);
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
+ auto IsBuildVectorOrUndef = [](const SDValue &Op) {
+ return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
+ };
+ bool AllBuildVectorsOrUndefs =
+ std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef);
+ if (AllBuildVectorsOrUndefs) {
SmallVector<SDValue, 8> Opnds;
- unsigned BuildVecNumElts = N0.getNumOperands();
-
- EVT SclTy0 = N0.getOperand(0)->getValueType(0);
- EVT SclTy1 = N1.getOperand(0)->getValueType(0);
- if (SclTy0.isFloatingPoint()) {
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N0.getOperand(i));
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N1.getOperand(i));
- } else {
+ EVT SVT = VT.getScalarType();
+
+ EVT MinVT = SVT;
+ if (!SVT.isFloatingPoint())
// If BUILD_VECTOR are from built from integer, they may have different
// operand types. Get the smaller type and truncate all operands to it.
- EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
- N0.getOperand(i)));
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
- N1.getOperand(i)));
+ for (const SDValue &Op : N->ops()) {
+ EVT OpSVT = Op.getValueType().getScalarType();
+ MinVT = MinVT.bitsLE(OpSVT) ? MinVT : OpSVT;
+ }
+
+ for (const SDValue &Op : N->ops()) {
+ EVT OpVT = Op.getValueType();
+ unsigned NumElts = OpVT.getVectorNumElements();
+
+ if (ISD::UNDEF == Op.getOpcode())
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(DAG.getUNDEF(MinVT));
+
+ if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+ if (SVT.isFloatingPoint()) {
+ assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(Op.getOperand(i));
+ } else {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
+ }
+ }
}
+ assert(VT.getVectorNumElements() == Opnds.size() &&
+ "Concat vector type mismatch");
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
}
diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll
index 132e17fef4d..568687dfd17 100644
--- a/llvm/test/CodeGen/X86/vector-zext.ll
+++ b/llvm/test/CodeGen/X86/vector-zext.ll
@@ -358,22 +358,16 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone
;
; AVX1-LABEL: shuf_zext_8i16_to_8i32:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuf_zext_8i16_to_8i32:
; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; AVX2-NEXT: vpunpcklwd{{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2-NEXT: # kill
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: retq
entry:
%B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
@@ -410,22 +404,17 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone
;
; AVX1-LABEL: shuf_zext_4i32_to_4i64:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,1],xmm1[0,0]
-; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
-; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
+; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuf_zext_4i32_to_4i64:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: # kill
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX2-NEXT: xorl %eax, %eax
-; AVX2-NEXT: vmovd %eax, %xmm1
-; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-NEXT: retq
entry:
%B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
OpenPOWER on IntegriCloud