summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFilipe Cabecinhas <me@filcab.net>2014-05-11 08:12:56 +0000
committerFilipe Cabecinhas <me@filcab.net>2014-05-11 08:12:56 +0000
commit0e3d1cb5d6df971f02960f4d6433faed7c0b1367 (patch)
treee2d5d136aa77ec1c84940cd30ccd99bd57b853b1
parent8ab3e83efa434b5a3301802a0c4e9f30b99f999e (diff)
downloadbcm5719-llvm-0e3d1cb5d6df971f02960f4d6433faed7c0b1367.tar.gz
bcm5719-llvm-0e3d1cb5d6df971f02960f4d6433faed7c0b1367.zip
Fixed a bug when lowering build_vector (PR19694)
When lowering build_vector to an insertps, we would still lower it, even if the source vectors weren't v4x32. This would break on avx if the source was a v8x32. We now check the type of the source vectors. llvm-svn: 208487
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp11
-rw-r--r--llvm/test/CodeGen/X86/avx-shuffle.ll18
2 files changed, 26 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4ed36906297..616171b2b69 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5458,7 +5458,12 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, unsigned NumElems,
return SDValue();
SDValue V = FirstNonZero.getOperand(0);
- unsigned FirstNonZeroDst = cast<ConstantSDNode>(FirstNonZero.getOperand(1))->getZExtValue();
+ MVT VVT = V.getSimpleValueType();
+ if (VVT != MVT::v4f32 && VVT != MVT::v4i32)
+ return SDValue();
+
+ unsigned FirstNonZeroDst =
+ cast<ConstantSDNode>(FirstNonZero.getOperand(1))->getZExtValue();
unsigned CorrectIdx = FirstNonZeroDst == FirstNonZeroIdx;
unsigned IncorrectIdx = CorrectIdx ? -1U : FirstNonZeroIdx;
unsigned IncorrectDst = CorrectIdx ? -1U : FirstNonZeroDst;
@@ -5498,8 +5503,8 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, unsigned NumElems,
else
ElementMoveMask = IncorrectDst << 6 | IncorrectIdx << 4;
- SDValue InsertpsMask = DAG.getIntPtrConstant(
- ElementMoveMask | (~NonZeros & 0xf));
+ SDValue InsertpsMask =
+ DAG.getIntPtrConstant(ElementMoveMask | (~NonZeros & 0xf));
return DAG.getNode(X86ISD::INSERTPS, dl, VT, V, V, InsertpsMask);
}
diff --git a/llvm/test/CodeGen/X86/avx-shuffle.ll b/llvm/test/CodeGen/X86/avx-shuffle.ll
index e472042b2ce..f407ba4cc16 100644
--- a/llvm/test/CodeGen/X86/avx-shuffle.ll
+++ b/llvm/test/CodeGen/X86/avx-shuffle.ll
@@ -314,3 +314,21 @@ define <2 x i64> @test_insert_64_zext(<2 x i64> %i) {
%1 = shufflevector <2 x i64> %i, <2 x i64> <i64 0, i64 undef>, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %1
}
+
+;; Ensure we don't use insertps from non v4x32 vectors.
+;; On SSE4.1 it works because bigger vectors use more than 1 register.
+;; On AVX they get passed in a single register.
+;; FIXME: We could probably optimize this case, if we're only using the
+;; first 4 indices.
+define <4 x i32> @insert_from_diff_size(<8 x i32> %x) {
+; CHECK-LABEL: insert_from_diff_size:
+; CHECK-NOT: insertps
+; CHECK: ret
+ %vecext = extractelement <8 x i32> %x, i32 0
+ %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
+ %vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
+ %vecinit2 = insertelement <4 x i32> %vecinit1, i32 0, i32 2
+ %a.0 = extractelement <8 x i32> %x, i32 0
+ %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %a.0, i32 3
+ ret <4 x i32> %vecinit3
+}
OpenPOWER on IntegriCloud